# HG changeset patch # User goetz # Date 1374503506 -7200 # Node ID 7c3ed829505824990f6bf61b048f9f692fdba5e0 # Parent 438e13354adf109452928df85b3d3eda0614706a 8019972: PPC64 (part 9): platform files for interpreter only VM. Summary: With this change the HotSpot core build works on Linux/PPC64. The VM succesfully executes simple test programs. Reviewed-by: kvn diff --git a/src/cpu/ppc/vm/assembler_ppc.cpp b/src/cpu/ppc/vm/assembler_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/assembler_ppc.cpp @@ -0,0 +1,699 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "gc_interface/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/cardTableModRefBS.hpp" +#include "memory/resourceArea.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#include "gc_implementation/g1/heapRegion.hpp" +#endif // INCLUDE_ALL_GCS + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) // nothing +#else +#define BLOCK_COMMENT(str) block_comment(str) +#endif + +int AbstractAssembler::code_fill_byte() { + return 0x00; // illegal instruction 0x00000000 +} + +void Assembler::print_instruction(int inst) { + Unimplemented(); +} + +// Patch instruction `inst' at offset `inst_pos' to refer to +// `dest_pos' and return the resulting instruction. We should have +// pcs, not offsets, but since all is relative, it will work out fine. +int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) { + int m = 0; // mask for displacement field + int v = 0; // new value for displacement field + + switch (inv_op_ppc(inst)) { + case b_op: m = li(-1); v = li(disp(dest_pos, inst_pos)); break; + case bc_op: m = bd(-1); v = bd(disp(dest_pos, inst_pos)); break; + default: ShouldNotReachHere(); + } + return inst & ~m | v; +} + +// Return the offset, relative to _code_begin, of the destination of +// the branch inst at offset pos. +int Assembler::branch_destination(int inst, int pos) { + int r = 0; + switch (inv_op_ppc(inst)) { + case b_op: r = bxx_destination_offset(inst, pos); break; + case bc_op: r = inv_bd_field(inst, pos); break; + default: ShouldNotReachHere(); + } + return r; +} + +// Low-level andi-one-instruction-macro. +void Assembler::andi(Register a, Register s, const int ui16) { + assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate"); + if (is_power_of_2_long(((jlong) ui16)+1)) { + // pow2minus1 + clrldi(a, s, 64-log2_long((((jlong) ui16)+1))); + } else if (is_power_of_2_long((jlong) ui16)) { + // pow2 + rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16)); + } else if (is_power_of_2_long((jlong)-ui16)) { + // negpow2 + clrrdi(a, s, log2_long((jlong)-ui16)); + } else { + andi_(a, s, ui16); + } +} + +// RegisterOrConstant version. +void Assembler::ld(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + if (s1 == noreg) { + int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true); + Assembler::ld(d, simm16_rest, d); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::ld(d, roc.as_constant(), s1); + } else { + load_const_optimized(d, roc.as_constant()); + Assembler::ldx(d, d, s1); + } + } else { + if (s1 == noreg) + Assembler::ld(d, 0, roc.as_register()); + else + Assembler::ldx(d, roc.as_register(), s1); + } +} + +void Assembler::lwa(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + if (s1 == noreg) { + int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true); + Assembler::lwa(d, simm16_rest, d); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::lwa(d, roc.as_constant(), s1); + } else { + load_const_optimized(d, roc.as_constant()); + Assembler::lwax(d, d, s1); + } + } else { + if (s1 == noreg) + Assembler::lwa(d, 0, roc.as_register()); + else + Assembler::lwax(d, roc.as_register(), s1); + } +} + +void Assembler::lwz(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + if (s1 == noreg) { + int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true); + Assembler::lwz(d, simm16_rest, d); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::lwz(d, roc.as_constant(), s1); + } else { + load_const_optimized(d, roc.as_constant()); + Assembler::lwzx(d, d, s1); + } + } else { + if (s1 == noreg) + Assembler::lwz(d, 0, roc.as_register()); + else + Assembler::lwzx(d, roc.as_register(), s1); + } +} + +void Assembler::lha(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + if (s1 == noreg) { + int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true); + Assembler::lha(d, simm16_rest, d); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::lha(d, roc.as_constant(), s1); + } else { + load_const_optimized(d, roc.as_constant()); + Assembler::lhax(d, d, s1); + } + } else { + if (s1 == noreg) + Assembler::lha(d, 0, roc.as_register()); + else + Assembler::lhax(d, roc.as_register(), s1); + } +} + +void Assembler::lhz(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + if (s1 == noreg) { + int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true); + Assembler::lhz(d, simm16_rest, d); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::lhz(d, roc.as_constant(), s1); + } else { + load_const_optimized(d, roc.as_constant()); + Assembler::lhzx(d, d, s1); + } + } else { + if (s1 == noreg) + Assembler::lhz(d, 0, roc.as_register()); + else + Assembler::lhzx(d, roc.as_register(), s1); + } +} + +void Assembler::lbz(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + if (s1 == noreg) { + int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true); + Assembler::lbz(d, simm16_rest, d); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::lbz(d, roc.as_constant(), s1); + } else { + load_const_optimized(d, roc.as_constant()); + Assembler::lbzx(d, d, s1); + } + } else { + if (s1 == noreg) + Assembler::lbz(d, 0, roc.as_register()); + else + Assembler::lbzx(d, roc.as_register(), s1); + } +} + +void Assembler::std(Register d, RegisterOrConstant roc, Register s1, Register tmp) { + if (roc.is_constant()) { + if (s1 == noreg) { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true); + Assembler::std(d, simm16_rest, tmp); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::std(d, roc.as_constant(), s1); + } else { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + load_const_optimized(tmp, roc.as_constant()); + Assembler::stdx(d, tmp, s1); + } + } else { + if (s1 == noreg) + Assembler::std(d, 0, roc.as_register()); + else + Assembler::stdx(d, roc.as_register(), s1); + } +} + +void Assembler::stw(Register d, RegisterOrConstant roc, Register s1, Register tmp) { + if (roc.is_constant()) { + if (s1 == noreg) { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true); + Assembler::stw(d, simm16_rest, tmp); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::stw(d, roc.as_constant(), s1); + } else { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + load_const_optimized(tmp, roc.as_constant()); + Assembler::stwx(d, tmp, s1); + } + } else { + if (s1 == noreg) + Assembler::stw(d, 0, roc.as_register()); + else + Assembler::stwx(d, roc.as_register(), s1); + } +} + +void Assembler::sth(Register d, RegisterOrConstant roc, Register s1, Register tmp) { + if (roc.is_constant()) { + if (s1 == noreg) { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true); + Assembler::sth(d, simm16_rest, tmp); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::sth(d, roc.as_constant(), s1); + } else { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + load_const_optimized(tmp, roc.as_constant()); + Assembler::sthx(d, tmp, s1); + } + } else { + if (s1 == noreg) + Assembler::sth(d, 0, roc.as_register()); + else + Assembler::sthx(d, roc.as_register(), s1); + } +} + +void Assembler::stb(Register d, RegisterOrConstant roc, Register s1, Register tmp) { + if (roc.is_constant()) { + if (s1 == noreg) { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true); + Assembler::stb(d, simm16_rest, tmp); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::stb(d, roc.as_constant(), s1); + } else { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + load_const_optimized(tmp, roc.as_constant()); + Assembler::stbx(d, tmp, s1); + } + } else { + if (s1 == noreg) + Assembler::stb(d, 0, roc.as_register()); + else + Assembler::stbx(d, roc.as_register(), s1); + } +} + +void Assembler::add(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + intptr_t c = roc.as_constant(); + assert(is_simm(c, 16), "too big"); + addi(d, s1, (int)c); + } + else add(d, roc.as_register(), s1); +} + +void Assembler::subf(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + intptr_t c = roc.as_constant(); + assert(is_simm(-c, 16), "too big"); + addi(d, s1, (int)-c); + } + else subf(d, roc.as_register(), s1); +} + +void Assembler::cmpd(ConditionRegister d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + intptr_t c = roc.as_constant(); + assert(is_simm(c, 16), "too big"); + cmpdi(d, s1, (int)c); + } + else cmpd(d, roc.as_register(), s1); +} + +// Load a 64 bit constant. Patchable. +void Assembler::load_const(Register d, long x, Register tmp) { + // 64-bit value: x = xa xb xc xd + int xa = (x >> 48) & 0xffff; + int xb = (x >> 32) & 0xffff; + int xc = (x >> 16) & 0xffff; + int xd = (x >> 0) & 0xffff; + if (tmp == noreg) { + Assembler::lis( d, (int)(short)xa); + Assembler::ori( d, d, (unsigned int)xb); + Assembler::sldi(d, d, 32); + Assembler::oris(d, d, (unsigned int)xc); + Assembler::ori( d, d, (unsigned int)xd); + } else { + // exploit instruction level parallelism if we have a tmp register + assert_different_registers(d, tmp); + Assembler::lis(tmp, (int)(short)xa); + Assembler::lis(d, (int)(short)xc); + Assembler::ori(tmp, tmp, (unsigned int)xb); + Assembler::ori(d, d, (unsigned int)xd); + Assembler::insrdi(d, tmp, 32, 0); + } +} + +// Load a 64 bit constant, optimized, not identifyable. +// Tmp can be used to increase ILP. Set return_simm16_rest=true to get a +// 16 bit immediate offset. +int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) { + // Avoid accidentally trying to use R0 for indexed addressing. + assert(d != R0, "R0 not allowed"); + assert_different_registers(d, tmp); + + short xa, xb, xc, xd; // Four 16-bit chunks of const. + long rem = x; // Remaining part of const. + + xd = rem & 0xFFFF; // Lowest 16-bit chunk. + rem = (rem >> 16) + ((unsigned short)xd >> 15); // Compensation for sign extend. + + if (rem == 0) { // opt 1: simm16 + li(d, xd); + return 0; + } + + xc = rem & 0xFFFF; // Next 16-bit chunk. + rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend. + + if (rem == 0) { // opt 2: simm32 + lis(d, xc); + } else { // High 32 bits needed. + + if (tmp != noreg) { // opt 3: We have a temp reg. + // No carry propagation between xc and higher chunks here (use logical instructions). + xa = (x >> 48) & 0xffff; + xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0. + bool load_xa = (xa != 0) || (xb < 0); + bool return_xd = false; + + if (load_xa) lis(tmp, xa); + if (xc) lis(d, xc); + if (load_xa) { + if (xb) ori(tmp, tmp, xb); // No addi, we support tmp == R0. + } else { + li(tmp, xb); // non-negative + } + if (xc) { + if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi. + else if (xd) { addi(d, d, xd); } + } else { + li(d, xd); + } + insrdi(d, tmp, 32, 0); + return return_xd ? xd : 0; // non-negative + } + + xb = rem & 0xFFFF; // Next 16-bit chunk. + rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend. + + xa = rem & 0xFFFF; // Highest 16-bit chunk. + + // opt 4: avoid adding 0 + if (xa) { // Highest 16-bit needed? + lis(d, xa); + if (xb) addi(d, d, xb); + } else { + li(d, xb); + } + sldi(d, d, 32); + if (xc) addis(d, d, xc); + } + + // opt 5: Return offset to be inserted into following instruction. + if (return_simm16_rest) return xd; + + if (xd) addi(d, d, xd); + return 0; +} + +#ifndef PRODUCT +// Test of ppc assembler. +void Assembler::test_asm() { + // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions + addi( R0, R1, 10); + addis( R5, R2, 11); + addic_( R3, R31, 42); + subfic( R21, R12, 2112); + add( R3, R2, R1); + add_( R11, R22, R30); + subf( R7, R6, R5); + subf_( R8, R9, R4); + addc( R11, R12, R13); + addc_( R14, R14, R14); + subfc( R15, R16, R17); + subfc_( R18, R20, R19); + adde( R20, R22, R24); + adde_( R29, R27, R26); + subfe( R28, R1, R0); + subfe_( R21, R11, R29); + neg( R21, R22); + neg_( R13, R23); + mulli( R0, R11, -31); + mulld( R1, R18, R21); + mulld_( R2, R17, R22); + mullw( R3, R16, R23); + mullw_( R4, R15, R24); + divd( R5, R14, R25); + divd_( R6, R13, R26); + divw( R7, R12, R27); + divw_( R8, R11, R28); + + li( R3, -4711); + + // PPC 1, section 3.3.9, Fixed-Point Compare Instructions + cmpi( CCR7, 0, R27, 4711); + cmp( CCR0, 1, R14, R11); + cmpli( CCR5, 1, R17, 45); + cmpl( CCR3, 0, R9, R10); + + cmpwi( CCR7, R27, 4711); + cmpw( CCR0, R14, R11); + cmplwi( CCR5, R17, 45); + cmplw( CCR3, R9, R10); + + cmpdi( CCR7, R27, 4711); + cmpd( CCR0, R14, R11); + cmpldi( CCR5, R17, 45); + cmpld( CCR3, R9, R10); + + // PPC 1, section 3.3.11, Fixed-Point Logical Instructions + andi_( R4, R5, 0xff); + andis_( R12, R13, 0x7b51); + ori( R1, R4, 13); + oris( R3, R5, 177); + xori( R7, R6, 51); + xoris( R29, R0, 1); + andr( R17, R21, R16); + and_( R3, R5, R15); + orr( R2, R1, R9); + or_( R17, R15, R11); + xorr( R19, R18, R10); + xor_( R31, R21, R11); + nand( R5, R7, R3); + nand_( R3, R1, R0); + nor( R2, R3, R5); + nor_( R3, R6, R8); + andc( R25, R12, R11); + andc_( R24, R22, R21); + orc( R20, R10, R12); + orc_( R22, R2, R13); + + nop(); + + // PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions + sld( R5, R6, R8); + sld_( R3, R5, R9); + slw( R2, R1, R10); + slw_( R6, R26, R16); + srd( R16, R24, R8); + srd_( R21, R14, R7); + srw( R22, R25, R29); + srw_( R5, R18, R17); + srad( R7, R11, R0); + srad_( R9, R13, R1); + sraw( R7, R15, R2); + sraw_( R4, R17, R3); + sldi( R3, R18, 63); + sldi_( R2, R20, 30); + slwi( R1, R21, 30); + slwi_( R7, R23, 8); + srdi( R0, R19, 2); + srdi_( R12, R24, 5); + srwi( R13, R27, 6); + srwi_( R14, R29, 7); + sradi( R15, R30, 9); + sradi_( R16, R31, 19); + srawi( R17, R31, 15); + srawi_( R18, R31, 12); + + clrrdi( R3, R30, 5); + clrldi( R9, R10, 11); + + rldicr( R19, R20, 13, 15); + rldicr_(R20, R20, 16, 14); + rldicl( R21, R21, 30, 33); + rldicl_(R22, R1, 20, 25); + rlwinm( R23, R2, 25, 10, 11); + rlwinm_(R24, R3, 12, 13, 14); + + // PPC 1, section 3.3.2 Fixed-Point Load Instructions + lwzx( R3, R5, R7); + lwz( R11, 0, R1); + lwzu( R31, -4, R11); + + lwax( R3, R5, R7); + lwa( R31, -4, R11); + lhzx( R3, R5, R7); + lhz( R31, -4, R11); + lhzu( R31, -4, R11); + + + lhax( R3, R5, R7); + lha( R31, -4, R11); + lhau( R11, 0, R1); + + lbzx( R3, R5, R7); + lbz( R31, -4, R11); + lbzu( R11, 0, R1); + + ld( R31, -4, R11); + ldx( R3, R5, R7); + ldu( R31, -4, R11); + + // PPC 1, section 3.3.3 Fixed-Point Store Instructions + stwx( R3, R5, R7); + stw( R31, -4, R11); + stwu( R11, 0, R1); + + sthx( R3, R5, R7 ); + sth( R31, -4, R11); + sthu( R31, -4, R11); + + stbx( R3, R5, R7); + stb( R31, -4, R11); + stbu( R31, -4, R11); + + std( R31, -4, R11); + stdx( R3, R5, R7); + stdu( R31, -4, R11); + + // PPC 1, section 3.3.13 Move To/From System Register Instructions + mtlr( R3); + mflr( R3); + mtctr( R3); + mfctr( R3); + mtcrf( 0xff, R15); + mtcr( R15); + mtcrf( 0x03, R15); + mtcr( R15); + mfcr( R15); + + // PPC 1, section 2.4.1 Branch Instructions + Label lbl1, lbl2, lbl3; + bind(lbl1); + + b(pc()); + b(pc() - 8); + b(lbl1); + b(lbl2); + b(lbl3); + + bl(pc() - 8); + bl(lbl1); + bl(lbl2); + + bcl(4, 10, pc() - 8); + bcl(4, 10, lbl1); + bcl(4, 10, lbl2); + + bclr( 4, 6, 0); + bclrl(4, 6, 0); + + bind(lbl2); + + bcctr( 4, 6, 0); + bcctrl(4, 6, 0); + + blt(CCR0, lbl2); + bgt(CCR1, lbl2); + beq(CCR2, lbl2); + bso(CCR3, lbl2); + bge(CCR4, lbl2); + ble(CCR5, lbl2); + bne(CCR6, lbl2); + bns(CCR7, lbl2); + + bltl(CCR0, lbl2); + bgtl(CCR1, lbl2); + beql(CCR2, lbl2); + bsol(CCR3, lbl2); + bgel(CCR4, lbl2); + blel(CCR5, lbl2); + bnel(CCR6, lbl2); + bnsl(CCR7, lbl2); + blr(); + + sync(); + icbi( R1, R2); + dcbst(R2, R3); + + // FLOATING POINT instructions ppc. + // PPC 1, section 4.6.2 Floating-Point Load Instructions + lfs( F1, -11, R3); + lfsu(F2, 123, R4); + lfsx(F3, R5, R6); + lfd( F4, 456, R7); + lfdu(F5, 789, R8); + lfdx(F6, R10, R11); + + // PPC 1, section 4.6.3 Floating-Point Store Instructions + stfs( F7, 876, R12); + stfsu( F8, 543, R13); + stfsx( F9, R14, R15); + stfd( F10, 210, R16); + stfdu( F11, 111, R17); + stfdx( F12, R18, R19); + + // PPC 1, section 4.6.4 Floating-Point Move Instructions + fmr( F13, F14); + fmr_( F14, F15); + fneg( F16, F17); + fneg_( F18, F19); + fabs( F20, F21); + fabs_( F22, F23); + fnabs( F24, F25); + fnabs_(F26, F27); + + // PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic + // Instructions + fadd( F28, F29, F30); + fadd_( F31, F0, F1); + fadds( F2, F3, F4); + fadds_(F5, F6, F7); + fsub( F8, F9, F10); + fsub_( F11, F12, F13); + fsubs( F14, F15, F16); + fsubs_(F17, F18, F19); + fmul( F20, F21, F22); + fmul_( F23, F24, F25); + fmuls( F26, F27, F28); + fmuls_(F29, F30, F31); + fdiv( F0, F1, F2); + fdiv_( F3, F4, F5); + fdivs( F6, F7, F8); + fdivs_(F9, F10, F11); + + // PPC 1, section 4.6.6 Floating-Point Rounding and Conversion + // Instructions + frsp( F12, F13); + fctid( F14, F15); + fctidz(F16, F17); + fctiw( F18, F19); + fctiwz(F20, F21); + fcfid( F22, F23); + + // PPC 1, section 4.6.7 Floating-Point Compare Instructions + fcmpu( CCR7, F24, F25); + + tty->print_cr("\ntest_asm disassembly (0x%lx 0x%lx):", code()->insts_begin(), code()->insts_end()); + code()->decode(); +} +#endif // !PRODUCT diff --git a/src/cpu/ppc/vm/assembler_ppc.hpp b/src/cpu/ppc/vm/assembler_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/assembler_ppc.hpp @@ -0,0 +1,1963 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_ASSEMBLER_PPC_HPP +#define CPU_PPC_VM_ASSEMBLER_PPC_HPP + +#include "asm/register.hpp" + +// Address is an abstraction used to represent a memory location +// as used in assembler instructions. +// PPC instructions grok either baseReg + indexReg or baseReg + disp. +// So far we do not use this as simplification by this class is low +// on PPC with its simple addressing mode. Use RegisterOrConstant to +// represent an offset. +class Address VALUE_OBJ_CLASS_SPEC { +}; + +class AddressLiteral VALUE_OBJ_CLASS_SPEC { + private: + address _address; + RelocationHolder _rspec; + + RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { + switch (rtype) { + case relocInfo::external_word_type: + return external_word_Relocation::spec(addr); + case relocInfo::internal_word_type: + return internal_word_Relocation::spec(addr); + case relocInfo::opt_virtual_call_type: + return opt_virtual_call_Relocation::spec(); + case relocInfo::static_call_type: + return static_call_Relocation::spec(); + case relocInfo::runtime_call_type: + return runtime_call_Relocation::spec(); + case relocInfo::none: + return RelocationHolder(); + default: + ShouldNotReachHere(); + return RelocationHolder(); + } + } + + protected: + // creation + AddressLiteral() : _address(NULL), _rspec(NULL) {} + + public: + AddressLiteral(address addr, RelocationHolder const& rspec) + : _address(addr), + _rspec(rspec) {} + + AddressLiteral(address addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(oop* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + intptr_t value() const { return (intptr_t) _address; } + + const RelocationHolder& rspec() const { return _rspec; } +}; + +// Argument is an abstraction used to represent an outgoing +// actual argument or an incoming formal parameter, whether +// it resides in memory or in a register, in a manner consistent +// with the PPC Application Binary Interface, or ABI. This is +// often referred to as the native or C calling convention. + +class Argument VALUE_OBJ_CLASS_SPEC { + private: + int _number; // The number of the argument. + public: + enum { + // Only 8 registers may contain integer parameters. + n_register_parameters = 8, + // Can have up to 8 floating registers. + n_float_register_parameters = 8 + }; + // creation + Argument(int number) : _number(number) {} + + int number() const { return _number; } + + // Locating register-based arguments: + bool is_register() const { return _number < n_register_parameters; } + + Register as_register() const { + assert(is_register(), "must be a register argument"); + return as_Register(number() + R3_ARG1->encoding()); + } +}; + +// A ppc64 function descriptor. +struct FunctionDescriptor VALUE_OBJ_CLASS_SPEC { + private: + address _entry; + address _toc; + address _env; + + public: + inline address entry() const { return _entry; } + inline address toc() const { return _toc; } + inline address env() const { return _env; } + + inline void set_entry(address entry) { _entry = entry; } + inline void set_toc( address toc) { _toc = toc; } + inline void set_env( address env) { _env = env; } + + inline static ByteSize entry_offset() { return byte_offset_of(FunctionDescriptor, _entry); } + inline static ByteSize toc_offset() { return byte_offset_of(FunctionDescriptor, _toc); } + inline static ByteSize env_offset() { return byte_offset_of(FunctionDescriptor, _env); } + + // Friend functions can be called without loading toc and env. + enum { + friend_toc = 0xcafe, + friend_env = 0xc0de + }; + + inline bool is_friend_function() const { + return (toc() == (address) friend_toc) && (env() == (address) friend_env); + } + + // Constructor for stack-allocated instances. + FunctionDescriptor() { + _entry = (address) 0xbad; + _toc = (address) 0xbad; + _env = (address) 0xbad; + } +}; + +class Assembler : public AbstractAssembler { + protected: + // Displacement routines + static void print_instruction(int inst); + static int patched_branch(int dest_pos, int inst, int inst_pos); + static int branch_destination(int inst, int pos); + + friend class AbstractAssembler; + + // Code patchers need various routines like inv_wdisp() + friend class NativeInstruction; + friend class NativeGeneralJump; + friend class Relocation; + + public: + + enum shifts { + XO_21_29_SHIFT = 2, + XO_21_30_SHIFT = 1, + XO_27_29_SHIFT = 2, + XO_30_31_SHIFT = 0, + SPR_5_9_SHIFT = 11u, // SPR_5_9 field in bits 11 -- 15 + SPR_0_4_SHIFT = 16u, // SPR_0_4 field in bits 16 -- 20 + RS_SHIFT = 21u, // RS field in bits 21 -- 25 + OPCODE_SHIFT = 26u, // opcode in bits 26 -- 31 + }; + + enum opcdxos_masks { + XL_FORM_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1), + ADDI_OPCODE_MASK = (63u << OPCODE_SHIFT), + ADDIS_OPCODE_MASK = (63u << OPCODE_SHIFT), + BXX_OPCODE_MASK = (63u << OPCODE_SHIFT), + BCXX_OPCODE_MASK = (63u << OPCODE_SHIFT), + // trap instructions + TDI_OPCODE_MASK = (63u << OPCODE_SHIFT), + TWI_OPCODE_MASK = (63u << OPCODE_SHIFT), + TD_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1), + TW_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1), + LD_OPCODE_MASK = (63u << OPCODE_SHIFT) | (3u << XO_30_31_SHIFT), // DS-FORM + STD_OPCODE_MASK = LD_OPCODE_MASK, + STDU_OPCODE_MASK = STD_OPCODE_MASK, + STDX_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1), + STDUX_OPCODE_MASK = STDX_OPCODE_MASK, + STW_OPCODE_MASK = (63u << OPCODE_SHIFT), + STWU_OPCODE_MASK = STW_OPCODE_MASK, + STWX_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1), + STWUX_OPCODE_MASK = STWX_OPCODE_MASK, + MTCTR_OPCODE_MASK = ~(31u << RS_SHIFT), + ORI_OPCODE_MASK = (63u << OPCODE_SHIFT), + ORIS_OPCODE_MASK = (63u << OPCODE_SHIFT), + RLDICR_OPCODE_MASK = (63u << OPCODE_SHIFT) | (7u << XO_27_29_SHIFT) + }; + + enum opcdxos { + ADD_OPCODE = (31u << OPCODE_SHIFT | 266u << 1), + ADDC_OPCODE = (31u << OPCODE_SHIFT | 10u << 1), + ADDI_OPCODE = (14u << OPCODE_SHIFT), + ADDIS_OPCODE = (15u << OPCODE_SHIFT), + ADDIC__OPCODE = (13u << OPCODE_SHIFT), + ADDE_OPCODE = (31u << OPCODE_SHIFT | 138u << 1), + SUBF_OPCODE = (31u << OPCODE_SHIFT | 40u << 1), + SUBFC_OPCODE = (31u << OPCODE_SHIFT | 8u << 1), + SUBFE_OPCODE = (31u << OPCODE_SHIFT | 136u << 1), + SUBFIC_OPCODE = (8u << OPCODE_SHIFT), + SUBFZE_OPCODE = (31u << OPCODE_SHIFT | 200u << 1), + DIVW_OPCODE = (31u << OPCODE_SHIFT | 491u << 1), + MULLW_OPCODE = (31u << OPCODE_SHIFT | 235u << 1), + MULHW_OPCODE = (31u << OPCODE_SHIFT | 75u << 1), + MULHWU_OPCODE = (31u << OPCODE_SHIFT | 11u << 1), + MULLI_OPCODE = (7u << OPCODE_SHIFT), + AND_OPCODE = (31u << OPCODE_SHIFT | 28u << 1), + ANDI_OPCODE = (28u << OPCODE_SHIFT), + ANDIS_OPCODE = (29u << OPCODE_SHIFT), + ANDC_OPCODE = (31u << OPCODE_SHIFT | 60u << 1), + ORC_OPCODE = (31u << OPCODE_SHIFT | 412u << 1), + OR_OPCODE = (31u << OPCODE_SHIFT | 444u << 1), + ORI_OPCODE = (24u << OPCODE_SHIFT), + ORIS_OPCODE = (25u << OPCODE_SHIFT), + XOR_OPCODE = (31u << OPCODE_SHIFT | 316u << 1), + XORI_OPCODE = (26u << OPCODE_SHIFT), + XORIS_OPCODE = (27u << OPCODE_SHIFT), + + NEG_OPCODE = (31u << OPCODE_SHIFT | 104u << 1), + + RLWINM_OPCODE = (21u << OPCODE_SHIFT), + CLRRWI_OPCODE = RLWINM_OPCODE, + CLRLWI_OPCODE = RLWINM_OPCODE, + + RLWIMI_OPCODE = (20u << OPCODE_SHIFT), + + SLW_OPCODE = (31u << OPCODE_SHIFT | 24u << 1), + SLWI_OPCODE = RLWINM_OPCODE, + SRW_OPCODE = (31u << OPCODE_SHIFT | 536u << 1), + SRWI_OPCODE = RLWINM_OPCODE, + SRAW_OPCODE = (31u << OPCODE_SHIFT | 792u << 1), + SRAWI_OPCODE = (31u << OPCODE_SHIFT | 824u << 1), + + CMP_OPCODE = (31u << OPCODE_SHIFT | 0u << 1), + CMPI_OPCODE = (11u << OPCODE_SHIFT), + CMPL_OPCODE = (31u << OPCODE_SHIFT | 32u << 1), + CMPLI_OPCODE = (10u << OPCODE_SHIFT), + + ISEL_OPCODE = (31u << OPCODE_SHIFT | 15u << 1), + + MTLR_OPCODE = (31u << OPCODE_SHIFT | 467u << 1 | 8 << SPR_0_4_SHIFT), + MFLR_OPCODE = (31u << OPCODE_SHIFT | 339u << 1 | 8 << SPR_0_4_SHIFT), + + MTCRF_OPCODE = (31u << OPCODE_SHIFT | 144u << 1), + MFCR_OPCODE = (31u << OPCODE_SHIFT | 19u << 1), + MCRF_OPCODE = (19u << OPCODE_SHIFT | 0u << 1), + + // condition register logic instructions + CRAND_OPCODE = (19u << OPCODE_SHIFT | 257u << 1), + CRNAND_OPCODE = (19u << OPCODE_SHIFT | 225u << 1), + CROR_OPCODE = (19u << OPCODE_SHIFT | 449u << 1), + CRXOR_OPCODE = (19u << OPCODE_SHIFT | 193u << 1), + CRNOR_OPCODE = (19u << OPCODE_SHIFT | 33u << 1), + CREQV_OPCODE = (19u << OPCODE_SHIFT | 289u << 1), + CRANDC_OPCODE = (19u << OPCODE_SHIFT | 129u << 1), + CRORC_OPCODE = (19u << OPCODE_SHIFT | 417u << 1), + + BCLR_OPCODE = (19u << OPCODE_SHIFT | 16u << 1), + BXX_OPCODE = (18u << OPCODE_SHIFT), + BCXX_OPCODE = (16u << OPCODE_SHIFT), + + // CTR-related opcodes + BCCTR_OPCODE = (19u << OPCODE_SHIFT | 528u << 1), + MTCTR_OPCODE = (31u << OPCODE_SHIFT | 467u << 1 | 9 << SPR_0_4_SHIFT), + MFCTR_OPCODE = (31u << OPCODE_SHIFT | 339u << 1 | 9 << SPR_0_4_SHIFT), + + + LWZ_OPCODE = (32u << OPCODE_SHIFT), + LWZX_OPCODE = (31u << OPCODE_SHIFT | 23u << 1), + LWZU_OPCODE = (33u << OPCODE_SHIFT), + + LHA_OPCODE = (42u << OPCODE_SHIFT), + LHAX_OPCODE = (31u << OPCODE_SHIFT | 343u << 1), + LHAU_OPCODE = (43u << OPCODE_SHIFT), + + LHZ_OPCODE = (40u << OPCODE_SHIFT), + LHZX_OPCODE = (31u << OPCODE_SHIFT | 279u << 1), + LHZU_OPCODE = (41u << OPCODE_SHIFT), + + LBZ_OPCODE = (34u << OPCODE_SHIFT), + LBZX_OPCODE = (31u << OPCODE_SHIFT | 87u << 1), + LBZU_OPCODE = (35u << OPCODE_SHIFT), + + STW_OPCODE = (36u << OPCODE_SHIFT), + STWX_OPCODE = (31u << OPCODE_SHIFT | 151u << 1), + STWU_OPCODE = (37u << OPCODE_SHIFT), + STWUX_OPCODE = (31u << OPCODE_SHIFT | 183u << 1), + + STH_OPCODE = (44u << OPCODE_SHIFT), + STHX_OPCODE = (31u << OPCODE_SHIFT | 407u << 1), + STHU_OPCODE = (45u << OPCODE_SHIFT), + + STB_OPCODE = (38u << OPCODE_SHIFT), + STBX_OPCODE = (31u << OPCODE_SHIFT | 215u << 1), + STBU_OPCODE = (39u << OPCODE_SHIFT), + + EXTSB_OPCODE = (31u << OPCODE_SHIFT | 954u << 1), + EXTSH_OPCODE = (31u << OPCODE_SHIFT | 922u << 1), + EXTSW_OPCODE = (31u << OPCODE_SHIFT | 986u << 1), // X-FORM + + // 32 bit opcode encodings + + LWA_OPCODE = (58u << OPCODE_SHIFT | 2u << XO_30_31_SHIFT), // DS-FORM + LWAX_OPCODE = (31u << OPCODE_SHIFT | 341u << XO_21_30_SHIFT), // X-FORM + + CNTLZW_OPCODE = (31u << OPCODE_SHIFT | 26u << XO_21_30_SHIFT), // X-FORM + + // 64 bit opcode encodings + + LD_OPCODE = (58u << OPCODE_SHIFT | 0u << XO_30_31_SHIFT), // DS-FORM + LDU_OPCODE = (58u << OPCODE_SHIFT | 1u << XO_30_31_SHIFT), // DS-FORM + LDX_OPCODE = (31u << OPCODE_SHIFT | 21u << XO_21_30_SHIFT), // X-FORM + + STD_OPCODE = (62u << OPCODE_SHIFT | 0u << XO_30_31_SHIFT), // DS-FORM + STDU_OPCODE = (62u << OPCODE_SHIFT | 1u << XO_30_31_SHIFT), // DS-FORM + STDUX_OPCODE = (31u << OPCODE_SHIFT | 181u << 1), // X-FORM + STDX_OPCODE = (31u << OPCODE_SHIFT | 149u << XO_21_30_SHIFT), // X-FORM + + RLDICR_OPCODE = (30u << OPCODE_SHIFT | 1u << XO_27_29_SHIFT), // MD-FORM + RLDICL_OPCODE = (30u << OPCODE_SHIFT | 0u << XO_27_29_SHIFT), // MD-FORM + RLDIC_OPCODE = (30u << OPCODE_SHIFT | 2u << XO_27_29_SHIFT), // MD-FORM + RLDIMI_OPCODE = (30u << OPCODE_SHIFT | 3u << XO_27_29_SHIFT), // MD-FORM + + SRADI_OPCODE = (31u << OPCODE_SHIFT | 413u << XO_21_29_SHIFT), // XS-FORM + + SLD_OPCODE = (31u << OPCODE_SHIFT | 27u << 1), // X-FORM + SRD_OPCODE = (31u << OPCODE_SHIFT | 539u << 1), // X-FORM + SRAD_OPCODE = (31u << OPCODE_SHIFT | 794u << 1), // X-FORM + + MULLD_OPCODE = (31u << OPCODE_SHIFT | 233u << 1), // XO-FORM + MULHD_OPCODE = (31u << OPCODE_SHIFT | 73u << 1), // XO-FORM + MULHDU_OPCODE = (31u << OPCODE_SHIFT | 9u << 1), // XO-FORM + DIVD_OPCODE = (31u << OPCODE_SHIFT | 489u << 1), // XO-FORM + + CNTLZD_OPCODE = (31u << OPCODE_SHIFT | 58u << XO_21_30_SHIFT), // X-FORM + NAND_OPCODE = (31u << OPCODE_SHIFT | 476u << XO_21_30_SHIFT), // X-FORM + NOR_OPCODE = (31u << OPCODE_SHIFT | 124u << XO_21_30_SHIFT), // X-FORM + + + // opcodes only used for floating arithmetic + FADD_OPCODE = (63u << OPCODE_SHIFT | 21u << 1), + FADDS_OPCODE = (59u << OPCODE_SHIFT | 21u << 1), + FCMPU_OPCODE = (63u << OPCODE_SHIFT | 00u << 1), + FDIV_OPCODE = (63u << OPCODE_SHIFT | 18u << 1), + FDIVS_OPCODE = (59u << OPCODE_SHIFT | 18u << 1), + FMR_OPCODE = (63u << OPCODE_SHIFT | 72u << 1), + // These are special Power6 opcodes, reused for "lfdepx" and "stfdepx" + // on Power7. Do not use. + // MFFGPR_OPCODE = (31u << OPCODE_SHIFT | 607u << 1), + // MFTGPR_OPCODE = (31u << OPCODE_SHIFT | 735u << 1), + CMPB_OPCODE = (31u << OPCODE_SHIFT | 508 << 1), + POPCNTB_OPCODE = (31u << OPCODE_SHIFT | 122 << 1), + POPCNTW_OPCODE = (31u << OPCODE_SHIFT | 378 << 1), + POPCNTD_OPCODE = (31u << OPCODE_SHIFT | 506 << 1), + FABS_OPCODE = (63u << OPCODE_SHIFT | 264u << 1), + FNABS_OPCODE = (63u << OPCODE_SHIFT | 136u << 1), + FMUL_OPCODE = (63u << OPCODE_SHIFT | 25u << 1), + FMULS_OPCODE = (59u << OPCODE_SHIFT | 25u << 1), + FNEG_OPCODE = (63u << OPCODE_SHIFT | 40u << 1), + FSUB_OPCODE = (63u << OPCODE_SHIFT | 20u << 1), + FSUBS_OPCODE = (59u << OPCODE_SHIFT | 20u << 1), + + // PPC64-internal FPU conversion opcodes + FCFID_OPCODE = (63u << OPCODE_SHIFT | 846u << 1), + FCFIDS_OPCODE = (59u << OPCODE_SHIFT | 846u << 1), + FCTID_OPCODE = (63u << OPCODE_SHIFT | 814u << 1), + FCTIDZ_OPCODE = (63u << OPCODE_SHIFT | 815u << 1), + FCTIW_OPCODE = (63u << OPCODE_SHIFT | 14u << 1), + FCTIWZ_OPCODE = (63u << OPCODE_SHIFT | 15u << 1), + FRSP_OPCODE = (63u << OPCODE_SHIFT | 12u << 1), + + // WARNING: using fmadd results in a non-compliant vm. Some floating + // point tck tests will fail. + FMADD_OPCODE = (59u << OPCODE_SHIFT | 29u << 1), + DMADD_OPCODE = (63u << OPCODE_SHIFT | 29u << 1), + FMSUB_OPCODE = (59u << OPCODE_SHIFT | 28u << 1), + DMSUB_OPCODE = (63u << OPCODE_SHIFT | 28u << 1), + FNMADD_OPCODE = (59u << OPCODE_SHIFT | 31u << 1), + DNMADD_OPCODE = (63u << OPCODE_SHIFT | 31u << 1), + FNMSUB_OPCODE = (59u << OPCODE_SHIFT | 30u << 1), + DNMSUB_OPCODE = (63u << OPCODE_SHIFT | 30u << 1), + + LFD_OPCODE = (50u << OPCODE_SHIFT | 00u << 1), + LFDU_OPCODE = (51u << OPCODE_SHIFT | 00u << 1), + LFDX_OPCODE = (31u << OPCODE_SHIFT | 599u << 1), + LFS_OPCODE = (48u << OPCODE_SHIFT | 00u << 1), + LFSU_OPCODE = (49u << OPCODE_SHIFT | 00u << 1), + LFSX_OPCODE = (31u << OPCODE_SHIFT | 535u << 1), + + STFD_OPCODE = (54u << OPCODE_SHIFT | 00u << 1), + STFDU_OPCODE = (55u << OPCODE_SHIFT | 00u << 1), + STFDX_OPCODE = (31u << OPCODE_SHIFT | 727u << 1), + STFS_OPCODE = (52u << OPCODE_SHIFT | 00u << 1), + STFSU_OPCODE = (53u << OPCODE_SHIFT | 00u << 1), + STFSX_OPCODE = (31u << OPCODE_SHIFT | 663u << 1), + + FSQRT_OPCODE = (63u << OPCODE_SHIFT | 22u << 1), // A-FORM + FSQRTS_OPCODE = (59u << OPCODE_SHIFT | 22u << 1), // A-FORM + + // Vector instruction support for >= Power6 + // Vector Storage Access + LVEBX_OPCODE = (31u << OPCODE_SHIFT | 7u << 1), + LVEHX_OPCODE = (31u << OPCODE_SHIFT | 39u << 1), + LVEWX_OPCODE = (31u << OPCODE_SHIFT | 71u << 1), + LVX_OPCODE = (31u << OPCODE_SHIFT | 103u << 1), + LVXL_OPCODE = (31u << OPCODE_SHIFT | 359u << 1), + STVEBX_OPCODE = (31u << OPCODE_SHIFT | 135u << 1), + STVEHX_OPCODE = (31u << OPCODE_SHIFT | 167u << 1), + STVEWX_OPCODE = (31u << OPCODE_SHIFT | 199u << 1), + STVX_OPCODE = (31u << OPCODE_SHIFT | 231u << 1), + STVXL_OPCODE = (31u << OPCODE_SHIFT | 487u << 1), + LVSL_OPCODE = (31u << OPCODE_SHIFT | 6u << 1), + LVSR_OPCODE = (31u << OPCODE_SHIFT | 38u << 1), + + // Vector Permute and Formatting + VPKPX_OPCODE = (4u << OPCODE_SHIFT | 782u ), + VPKSHSS_OPCODE = (4u << OPCODE_SHIFT | 398u ), + VPKSWSS_OPCODE = (4u << OPCODE_SHIFT | 462u ), + VPKSHUS_OPCODE = (4u << OPCODE_SHIFT | 270u ), + VPKSWUS_OPCODE = (4u << OPCODE_SHIFT | 334u ), + VPKUHUM_OPCODE = (4u << OPCODE_SHIFT | 14u ), + VPKUWUM_OPCODE = (4u << OPCODE_SHIFT | 78u ), + VPKUHUS_OPCODE = (4u << OPCODE_SHIFT | 142u ), + VPKUWUS_OPCODE = (4u << OPCODE_SHIFT | 206u ), + VUPKHPX_OPCODE = (4u << OPCODE_SHIFT | 846u ), + VUPKHSB_OPCODE = (4u << OPCODE_SHIFT | 526u ), + VUPKHSH_OPCODE = (4u << OPCODE_SHIFT | 590u ), + VUPKLPX_OPCODE = (4u << OPCODE_SHIFT | 974u ), + VUPKLSB_OPCODE = (4u << OPCODE_SHIFT | 654u ), + VUPKLSH_OPCODE = (4u << OPCODE_SHIFT | 718u ), + + VMRGHB_OPCODE = (4u << OPCODE_SHIFT | 12u ), + VMRGHW_OPCODE = (4u << OPCODE_SHIFT | 140u ), + VMRGHH_OPCODE = (4u << OPCODE_SHIFT | 76u ), + VMRGLB_OPCODE = (4u << OPCODE_SHIFT | 268u ), + VMRGLW_OPCODE = (4u << OPCODE_SHIFT | 396u ), + VMRGLH_OPCODE = (4u << OPCODE_SHIFT | 332u ), + + VSPLT_OPCODE = (4u << OPCODE_SHIFT | 524u ), + VSPLTH_OPCODE = (4u << OPCODE_SHIFT | 588u ), + VSPLTW_OPCODE = (4u << OPCODE_SHIFT | 652u ), + VSPLTISB_OPCODE= (4u << OPCODE_SHIFT | 780u ), + VSPLTISH_OPCODE= (4u << OPCODE_SHIFT | 844u ), + VSPLTISW_OPCODE= (4u << OPCODE_SHIFT | 908u ), + + VPERM_OPCODE = (4u << OPCODE_SHIFT | 43u ), + VSEL_OPCODE = (4u << OPCODE_SHIFT | 42u ), + + VSL_OPCODE = (4u << OPCODE_SHIFT | 452u ), + VSLDOI_OPCODE = (4u << OPCODE_SHIFT | 44u ), + VSLO_OPCODE = (4u << OPCODE_SHIFT | 1036u ), + VSR_OPCODE = (4u << OPCODE_SHIFT | 708u ), + VSRO_OPCODE = (4u << OPCODE_SHIFT | 1100u ), + + // Vector Integer + VADDCUW_OPCODE = (4u << OPCODE_SHIFT | 384u ), + VADDSHS_OPCODE = (4u << OPCODE_SHIFT | 832u ), + VADDSBS_OPCODE = (4u << OPCODE_SHIFT | 768u ), + VADDSWS_OPCODE = (4u << OPCODE_SHIFT | 896u ), + VADDUBM_OPCODE = (4u << OPCODE_SHIFT | 0u ), + VADDUWM_OPCODE = (4u << OPCODE_SHIFT | 128u ), + VADDUHM_OPCODE = (4u << OPCODE_SHIFT | 64u ), + VADDUBS_OPCODE = (4u << OPCODE_SHIFT | 512u ), + VADDUWS_OPCODE = (4u << OPCODE_SHIFT | 640u ), + VADDUHS_OPCODE = (4u << OPCODE_SHIFT | 576u ), + VSUBCUW_OPCODE = (4u << OPCODE_SHIFT | 1408u ), + VSUBSHS_OPCODE = (4u << OPCODE_SHIFT | 1856u ), + VSUBSBS_OPCODE = (4u << OPCODE_SHIFT | 1792u ), + VSUBSWS_OPCODE = (4u << OPCODE_SHIFT | 1920u ), + VSUBUBM_OPCODE = (4u << OPCODE_SHIFT | 1024u ), + VSUBUWM_OPCODE = (4u << OPCODE_SHIFT | 1152u ), + VSUBUHM_OPCODE = (4u << OPCODE_SHIFT | 1088u ), + VSUBUBS_OPCODE = (4u << OPCODE_SHIFT | 1536u ), + VSUBUWS_OPCODE = (4u << OPCODE_SHIFT | 1664u ), + VSUBUHS_OPCODE = (4u << OPCODE_SHIFT | 1600u ), + + VMULESB_OPCODE = (4u << OPCODE_SHIFT | 776u ), + VMULEUB_OPCODE = (4u << OPCODE_SHIFT | 520u ), + VMULESH_OPCODE = (4u << OPCODE_SHIFT | 840u ), + VMULEUH_OPCODE = (4u << OPCODE_SHIFT | 584u ), + VMULOSB_OPCODE = (4u << OPCODE_SHIFT | 264u ), + VMULOUB_OPCODE = (4u << OPCODE_SHIFT | 8u ), + VMULOSH_OPCODE = (4u << OPCODE_SHIFT | 328u ), + VMULOUH_OPCODE = (4u << OPCODE_SHIFT | 72u ), + VMHADDSHS_OPCODE=(4u << OPCODE_SHIFT | 32u ), + VMHRADDSHS_OPCODE=(4u << OPCODE_SHIFT | 33u ), + VMLADDUHM_OPCODE=(4u << OPCODE_SHIFT | 34u ), + VMSUBUHM_OPCODE= (4u << OPCODE_SHIFT | 36u ), + VMSUMMBM_OPCODE= (4u << OPCODE_SHIFT | 37u ), + VMSUMSHM_OPCODE= (4u << OPCODE_SHIFT | 40u ), + VMSUMSHS_OPCODE= (4u << OPCODE_SHIFT | 41u ), + VMSUMUHM_OPCODE= (4u << OPCODE_SHIFT | 38u ), + VMSUMUHS_OPCODE= (4u << OPCODE_SHIFT | 39u ), + + VSUMSWS_OPCODE = (4u << OPCODE_SHIFT | 1928u ), + VSUM2SWS_OPCODE= (4u << OPCODE_SHIFT | 1672u ), + VSUM4SBS_OPCODE= (4u << OPCODE_SHIFT | 1800u ), + VSUM4UBS_OPCODE= (4u << OPCODE_SHIFT | 1544u ), + VSUM4SHS_OPCODE= (4u << OPCODE_SHIFT | 1608u ), + + VAVGSB_OPCODE = (4u << OPCODE_SHIFT | 1282u ), + VAVGSW_OPCODE = (4u << OPCODE_SHIFT | 1410u ), + VAVGSH_OPCODE = (4u << OPCODE_SHIFT | 1346u ), + VAVGUB_OPCODE = (4u << OPCODE_SHIFT | 1026u ), + VAVGUW_OPCODE = (4u << OPCODE_SHIFT | 1154u ), + VAVGUH_OPCODE = (4u << OPCODE_SHIFT | 1090u ), + + VMAXSB_OPCODE = (4u << OPCODE_SHIFT | 258u ), + VMAXSW_OPCODE = (4u << OPCODE_SHIFT | 386u ), + VMAXSH_OPCODE = (4u << OPCODE_SHIFT | 322u ), + VMAXUB_OPCODE = (4u << OPCODE_SHIFT | 2u ), + VMAXUW_OPCODE = (4u << OPCODE_SHIFT | 130u ), + VMAXUH_OPCODE = (4u << OPCODE_SHIFT | 66u ), + VMINSB_OPCODE = (4u << OPCODE_SHIFT | 770u ), + VMINSW_OPCODE = (4u << OPCODE_SHIFT | 898u ), + VMINSH_OPCODE = (4u << OPCODE_SHIFT | 834u ), + VMINUB_OPCODE = (4u << OPCODE_SHIFT | 514u ), + VMINUW_OPCODE = (4u << OPCODE_SHIFT | 642u ), + VMINUH_OPCODE = (4u << OPCODE_SHIFT | 578u ), + + VCMPEQUB_OPCODE= (4u << OPCODE_SHIFT | 6u ), + VCMPEQUH_OPCODE= (4u << OPCODE_SHIFT | 70u ), + VCMPEQUW_OPCODE= (4u << OPCODE_SHIFT | 134u ), + VCMPGTSH_OPCODE= (4u << OPCODE_SHIFT | 838u ), + VCMPGTSB_OPCODE= (4u << OPCODE_SHIFT | 774u ), + VCMPGTSW_OPCODE= (4u << OPCODE_SHIFT | 902u ), + VCMPGTUB_OPCODE= (4u << OPCODE_SHIFT | 518u ), + VCMPGTUH_OPCODE= (4u << OPCODE_SHIFT | 582u ), + VCMPGTUW_OPCODE= (4u << OPCODE_SHIFT | 646u ), + + VAND_OPCODE = (4u << OPCODE_SHIFT | 1028u ), + VANDC_OPCODE = (4u << OPCODE_SHIFT | 1092u ), + VNOR_OPCODE = (4u << OPCODE_SHIFT | 1284u ), + VOR_OPCODE = (4u << OPCODE_SHIFT | 1156u ), + VXOR_OPCODE = (4u << OPCODE_SHIFT | 1220u ), + VRLB_OPCODE = (4u << OPCODE_SHIFT | 4u ), + VRLW_OPCODE = (4u << OPCODE_SHIFT | 132u ), + VRLH_OPCODE = (4u << OPCODE_SHIFT | 68u ), + VSLB_OPCODE = (4u << OPCODE_SHIFT | 260u ), + VSKW_OPCODE = (4u << OPCODE_SHIFT | 388u ), + VSLH_OPCODE = (4u << OPCODE_SHIFT | 324u ), + VSRB_OPCODE = (4u << OPCODE_SHIFT | 516u ), + VSRW_OPCODE = (4u << OPCODE_SHIFT | 644u ), + VSRH_OPCODE = (4u << OPCODE_SHIFT | 580u ), + VSRAB_OPCODE = (4u << OPCODE_SHIFT | 772u ), + VSRAW_OPCODE = (4u << OPCODE_SHIFT | 900u ), + VSRAH_OPCODE = (4u << OPCODE_SHIFT | 836u ), + + // Vector Floating-Point + // not implemented yet + + // Vector Status and Control + MTVSCR_OPCODE = (4u << OPCODE_SHIFT | 1604u ), + MFVSCR_OPCODE = (4u << OPCODE_SHIFT | 1540u ), + + // Icache and dcache related instructions + DCBA_OPCODE = (31u << OPCODE_SHIFT | 758u << 1), + DCBZ_OPCODE = (31u << OPCODE_SHIFT | 1014u << 1), + DCBST_OPCODE = (31u << OPCODE_SHIFT | 54u << 1), + DCBF_OPCODE = (31u << OPCODE_SHIFT | 86u << 1), + + DCBT_OPCODE = (31u << OPCODE_SHIFT | 278u << 1), + DCBTST_OPCODE = (31u << OPCODE_SHIFT | 246u << 1), + ICBI_OPCODE = (31u << OPCODE_SHIFT | 982u << 1), + + // Instruction synchronization + ISYNC_OPCODE = (19u << OPCODE_SHIFT | 150u << 1), + // Memory barriers + SYNC_OPCODE = (31u << OPCODE_SHIFT | 598u << 1), + EIEIO_OPCODE = (31u << OPCODE_SHIFT | 854u << 1), + + // Trap instructions + TDI_OPCODE = (2u << OPCODE_SHIFT), + TWI_OPCODE = (3u << OPCODE_SHIFT), + TD_OPCODE = (31u << OPCODE_SHIFT | 68u << 1), + TW_OPCODE = (31u << OPCODE_SHIFT | 4u << 1), + + // Atomics. + LWARX_OPCODE = (31u << OPCODE_SHIFT | 20u << 1), + LDARX_OPCODE = (31u << OPCODE_SHIFT | 84u << 1), + STWCX_OPCODE = (31u << OPCODE_SHIFT | 150u << 1), + STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1) + + }; + + // Trap instructions TO bits + enum trap_to_bits { + // single bits + traptoLessThanSigned = 1 << 4, // 0, left end + traptoGreaterThanSigned = 1 << 3, + traptoEqual = 1 << 2, + traptoLessThanUnsigned = 1 << 1, + traptoGreaterThanUnsigned = 1 << 0, // 4, right end + + // compound ones + traptoUnconditional = (traptoLessThanSigned | + traptoGreaterThanSigned | + traptoEqual | + traptoLessThanUnsigned | + traptoGreaterThanUnsigned) + }; + + // Branch hints BH field + enum branch_hint_bh { + // bclr cases: + bhintbhBCLRisReturn = 0, + bhintbhBCLRisNotReturnButSame = 1, + bhintbhBCLRisNotPredictable = 3, + + // bcctr cases: + bhintbhBCCTRisNotReturnButSame = 0, + bhintbhBCCTRisNotPredictable = 3 + }; + + // Branch prediction hints AT field + enum branch_hint_at { + bhintatNoHint = 0, // at=00 + bhintatIsNotTaken = 2, // at=10 + bhintatIsTaken = 3 // at=11 + }; + + // Branch prediction hints + enum branch_hint_concept { + // Use the same encoding as branch_hint_at to simply code. + bhintNoHint = bhintatNoHint, + bhintIsNotTaken = bhintatIsNotTaken, + bhintIsTaken = bhintatIsTaken + }; + + // Used in BO field of branch instruction. + enum branch_condition { + bcondCRbiIs0 = 4, // bo=001at + bcondCRbiIs1 = 12, // bo=011at + bcondAlways = 20 // bo=10100 + }; + + // Branch condition with combined prediction hints. + enum branch_condition_with_hint { + bcondCRbiIs0_bhintNoHint = bcondCRbiIs0 | bhintatNoHint, + bcondCRbiIs0_bhintIsNotTaken = bcondCRbiIs0 | bhintatIsNotTaken, + bcondCRbiIs0_bhintIsTaken = bcondCRbiIs0 | bhintatIsTaken, + bcondCRbiIs1_bhintNoHint = bcondCRbiIs1 | bhintatNoHint, + bcondCRbiIs1_bhintIsNotTaken = bcondCRbiIs1 | bhintatIsNotTaken, + bcondCRbiIs1_bhintIsTaken = bcondCRbiIs1 | bhintatIsTaken, + }; + + // Branch prediction hints. + inline static int add_bhint_to_boint(const int bhint, const int boint) { + switch (boint) { + case bcondCRbiIs0: + case bcondCRbiIs1: + // branch_hint and branch_hint_at have same encodings + assert( (int)bhintNoHint == (int)bhintatNoHint + && (int)bhintIsNotTaken == (int)bhintatIsNotTaken + && (int)bhintIsTaken == (int)bhintatIsTaken, + "wrong encodings"); + assert((bhint & 0x03) == bhint, "wrong encodings"); + return (boint & ~0x03) | bhint; + case bcondAlways: + // no branch_hint + return boint; + default: + ShouldNotReachHere(); + return 0; + } + } + + // Extract bcond from boint. + inline static int inv_boint_bcond(const int boint) { + int r_bcond = boint & ~0x03; + assert(r_bcond == bcondCRbiIs0 || + r_bcond == bcondCRbiIs1 || + r_bcond == bcondAlways, + "bad branch condition"); + return r_bcond; + } + + // Extract bhint from boint. + inline static int inv_boint_bhint(const int boint) { + int r_bhint = boint & 0x03; + assert(r_bhint == bhintatNoHint || + r_bhint == bhintatIsNotTaken || + r_bhint == bhintatIsTaken, + "bad branch hint"); + return r_bhint; + } + + // Calculate opposite of given bcond. + inline static int opposite_bcond(const int bcond) { + switch (bcond) { + case bcondCRbiIs0: + return bcondCRbiIs1; + case bcondCRbiIs1: + return bcondCRbiIs0; + default: + ShouldNotReachHere(); + return 0; + } + } + + // Calculate opposite of given bhint. + inline static int opposite_bhint(const int bhint) { + switch (bhint) { + case bhintatNoHint: + return bhintatNoHint; + case bhintatIsNotTaken: + return bhintatIsTaken; + case bhintatIsTaken: + return bhintatIsNotTaken; + default: + ShouldNotReachHere(); + return 0; + } + } + + // PPC branch instructions + enum ppcops { + b_op = 18, + bc_op = 16, + bcr_op = 19 + }; + + enum Condition { + negative = 0, + less = 0, + positive = 1, + greater = 1, + zero = 2, + equal = 2, + summary_overflow = 3, + }; + + public: + // Helper functions for groups of instructions + + enum Predict { pt = 1, pn = 0 }; // pt = predict taken + + enum Membar_mask_bits { // page 184, v9 + StoreStore = 1 << 3, + LoadStore = 1 << 2, + StoreLoad = 1 << 1, + LoadLoad = 1 << 0, + + Sync = 1 << 6, + MemIssue = 1 << 5, + Lookaside = 1 << 4 + }; + + // instruction must start at passed address + static int instr_len(unsigned char *instr) { return BytesPerInstWord; } + + // instruction must be left-justified in argument + static int instr_len(unsigned long instr) { return BytesPerInstWord; } + + // longest instructions + static int instr_maxlen() { return BytesPerInstWord; } + + // Test if x is within signed immediate range for nbits. + static bool is_simm(int x, unsigned int nbits) { + assert(0 < nbits && nbits < 32, "out of bounds"); + const int min = -( ((int)1) << nbits-1 ); + const int maxplus1 = ( ((int)1) << nbits-1 ); + return min <= x && x < maxplus1; + } + + static bool is_simm(jlong x, unsigned int nbits) { + assert(0 < nbits && nbits < 64, "out of bounds"); + const jlong min = -( ((jlong)1) << nbits-1 ); + const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); + return min <= x && x < maxplus1; + } + + // Test if x is within unsigned immediate range for nbits + static bool is_uimm(int x, unsigned int nbits) { + assert(0 < nbits && nbits < 32, "out of bounds"); + const int maxplus1 = ( ((int)1) << nbits ); + return 0 <= x && x < maxplus1; + } + + static bool is_uimm(jlong x, unsigned int nbits) { + assert(0 < nbits && nbits < 64, "out of bounds"); + const jlong maxplus1 = ( ((jlong)1) << nbits ); + return 0 <= x && x < maxplus1; + } + + protected: + // helpers + + // X is supposed to fit in a field "nbits" wide + // and be sign-extended. Check the range. + static void assert_signed_range(intptr_t x, int nbits) { + assert(nbits == 32 || (-(1 << nbits-1) <= x && x < (1 << nbits-1)), + "value out of range"); + } + + static void assert_signed_word_disp_range(intptr_t x, int nbits) { + assert((x & 3) == 0, "not word aligned"); + assert_signed_range(x, nbits + 2); + } + + static void assert_unsigned_const(int x, int nbits) { + assert(juint(x) < juint(1 << nbits), "unsigned constant out of range"); + } + + static int fmask(juint hi_bit, juint lo_bit) { + assert(hi_bit >= lo_bit && hi_bit < 32, "bad bits"); + return (1 << ( hi_bit-lo_bit + 1 )) - 1; + } + + // inverse of u_field + static int inv_u_field(int x, int hi_bit, int lo_bit) { + juint r = juint(x) >> lo_bit; + r &= fmask(hi_bit, lo_bit); + return int(r); + } + + // signed version: extract from field and sign-extend + static int inv_s_field_ppc(int x, int hi_bit, int lo_bit) { + x = x << (31-hi_bit); + x = x >> (31-hi_bit+lo_bit); + return x; + } + + static int u_field(int x, int hi_bit, int lo_bit) { + assert((x & ~fmask(hi_bit, lo_bit)) == 0, "value out of range"); + int r = x << lo_bit; + assert(inv_u_field(r, hi_bit, lo_bit) == x, "just checking"); + return r; + } + + // Same as u_field for signed values + static int s_field(int x, int hi_bit, int lo_bit) { + int nbits = hi_bit - lo_bit + 1; + assert(nbits == 32 || (-(1 << nbits-1) <= x && x < (1 << nbits-1)), + "value out of range"); + x &= fmask(hi_bit, lo_bit); + int r = x << lo_bit; + return r; + } + + // inv_op for ppc instructions + static int inv_op_ppc(int x) { return inv_u_field(x, 31, 26); } + + // Determine target address from li, bd field of branch instruction. + static intptr_t inv_li_field(int x) { + intptr_t r = inv_s_field_ppc(x, 25, 2); + r = (r << 2); + return r; + } + static intptr_t inv_bd_field(int x, intptr_t pos) { + intptr_t r = inv_s_field_ppc(x, 15, 2); + r = (r << 2) + pos; + return r; + } + + #define inv_opp_u_field(x, hi_bit, lo_bit) inv_u_field(x, 31-(lo_bit), 31-(hi_bit)) + #define inv_opp_s_field(x, hi_bit, lo_bit) inv_s_field_ppc(x, 31-(lo_bit), 31-(hi_bit)) + // Extract instruction fields from instruction words. + public: + static int inv_ra_field(int x) { return inv_opp_u_field(x, 15, 11); } + static int inv_rb_field(int x) { return inv_opp_u_field(x, 20, 16); } + static int inv_rt_field(int x) { return inv_opp_u_field(x, 10, 6); } + static int inv_rs_field(int x) { return inv_opp_u_field(x, 10, 6); } + // Ds uses opp_s_field(x, 31, 16), but lowest 2 bits must be 0. + // Inv_ds_field uses range (x, 29, 16) but shifts by 2 to ensure that lowest bits are 0. + static int inv_ds_field(int x) { return inv_opp_s_field(x, 29, 16) << 2; } + static int inv_d1_field(int x) { return inv_opp_s_field(x, 31, 16); } + static int inv_si_field(int x) { return inv_opp_s_field(x, 31, 16); } + static int inv_to_field(int x) { return inv_opp_u_field(x, 10, 6); } + static int inv_lk_field(int x) { return inv_opp_u_field(x, 31, 31); } + static int inv_bo_field(int x) { return inv_opp_u_field(x, 10, 6); } + static int inv_bi_field(int x) { return inv_opp_u_field(x, 15, 11); } + + #define opp_u_field(x, hi_bit, lo_bit) u_field(x, 31-(lo_bit), 31-(hi_bit)) + #define opp_s_field(x, hi_bit, lo_bit) s_field(x, 31-(lo_bit), 31-(hi_bit)) + + // instruction fields + static int aa( int x) { return opp_u_field(x, 30, 30); } + static int ba( int x) { return opp_u_field(x, 15, 11); } + static int bb( int x) { return opp_u_field(x, 20, 16); } + static int bc( int x) { return opp_u_field(x, 25, 21); } + static int bd( int x) { return opp_s_field(x, 29, 16); } + static int bf( ConditionRegister cr) { return bf(cr->encoding()); } + static int bf( int x) { return opp_u_field(x, 8, 6); } + static int bfa(ConditionRegister cr) { return bfa(cr->encoding()); } + static int bfa( int x) { return opp_u_field(x, 13, 11); } + static int bh( int x) { return opp_u_field(x, 20, 19); } + static int bi( int x) { return opp_u_field(x, 15, 11); } + static int bi0(ConditionRegister cr, Condition c) { return (cr->encoding() << 2) | c; } + static int bo( int x) { return opp_u_field(x, 10, 6); } + static int bt( int x) { return opp_u_field(x, 10, 6); } + static int d1( int x) { return opp_s_field(x, 31, 16); } + static int ds( int x) { assert((x & 0x3) == 0, "unaligned offset"); return opp_s_field(x, 31, 16); } + static int eh( int x) { return opp_u_field(x, 31, 31); } + static int flm( int x) { return opp_u_field(x, 14, 7); } + static int fra( FloatRegister r) { return fra(r->encoding());} + static int frb( FloatRegister r) { return frb(r->encoding());} + static int frc( FloatRegister r) { return frc(r->encoding());} + static int frs( FloatRegister r) { return frs(r->encoding());} + static int frt( FloatRegister r) { return frt(r->encoding());} + static int fra( int x) { return opp_u_field(x, 15, 11); } + static int frb( int x) { return opp_u_field(x, 20, 16); } + static int frc( int x) { return opp_u_field(x, 25, 21); } + static int frs( int x) { return opp_u_field(x, 10, 6); } + static int frt( int x) { return opp_u_field(x, 10, 6); } + static int fxm( int x) { return opp_u_field(x, 19, 12); } + static int l10( int x) { return opp_u_field(x, 10, 10); } + static int l15( int x) { return opp_u_field(x, 15, 15); } + static int l910( int x) { return opp_u_field(x, 10, 9); } + static int lev( int x) { return opp_u_field(x, 26, 20); } + static int li( int x) { return opp_s_field(x, 29, 6); } + static int lk( int x) { return opp_u_field(x, 31, 31); } + static int mb2125( int x) { return opp_u_field(x, 25, 21); } + static int me2630( int x) { return opp_u_field(x, 30, 26); } + static int mb2126( int x) { return opp_u_field(((x & 0x1f) << 1) | ((x & 0x20) >> 5), 26, 21); } + static int me2126( int x) { return mb2126(x); } + static int nb( int x) { return opp_u_field(x, 20, 16); } + //static int opcd( int x) { return opp_u_field(x, 5, 0); } // is contained in our opcodes + static int oe( int x) { return opp_u_field(x, 21, 21); } + static int ra( Register r) { return ra(r->encoding()); } + static int ra( int x) { return opp_u_field(x, 15, 11); } + static int rb( Register r) { return rb(r->encoding()); } + static int rb( int x) { return opp_u_field(x, 20, 16); } + static int rc( int x) { return opp_u_field(x, 31, 31); } + static int rs( Register r) { return rs(r->encoding()); } + static int rs( int x) { return opp_u_field(x, 10, 6); } + // we don't want to use R0 in memory accesses, because it has value `0' then + static int ra0mem( Register r) { assert(r != R0, "cannot use register R0 in memory access"); return ra(r); } + static int ra0mem( int x) { assert(x != 0, "cannot use register 0 in memory access"); return ra(x); } + + // register r is target + static int rt( Register r) { return rs(r); } + static int rt( int x) { return rs(x); } + static int rta( Register r) { return ra(r); } + static int rta0mem( Register r) { rta(r); return ra0mem(r); } + + static int sh1620( int x) { return opp_u_field(x, 20, 16); } + static int sh30( int x) { return opp_u_field(x, 30, 30); } + static int sh162030( int x) { return sh1620(x & 0x1f) | sh30((x & 0x20) >> 5); } + static int si( int x) { return opp_s_field(x, 31, 16); } + static int spr( int x) { return opp_u_field(x, 20, 11); } + static int sr( int x) { return opp_u_field(x, 15, 12); } + static int tbr( int x) { return opp_u_field(x, 20, 11); } + static int th( int x) { return opp_u_field(x, 10, 7); } + static int thct( int x) { assert((x&8)==0, "must be valid cache specification"); return th(x); } + static int thds( int x) { assert((x&8)==8, "must be valid stream specification"); return th(x); } + static int to( int x) { return opp_u_field(x, 10, 6); } + static int u( int x) { return opp_u_field(x, 19, 16); } + static int ui( int x) { return opp_u_field(x, 31, 16); } + + // support vector instructions for >= Power6 + static int vra( int x) { return opp_u_field(x, 15, 11); } + static int vrb( int x) { return opp_u_field(x, 20, 16); } + static int vrc( int x) { return opp_u_field(x, 25, 21); } + static int vrs( int x) { return opp_u_field(x, 10, 6); } + static int vrt( int x) { return opp_u_field(x, 10, 6); } + + static int vra( VectorRegister r) { return vra(r->encoding());} + static int vrb( VectorRegister r) { return vrb(r->encoding());} + static int vrc( VectorRegister r) { return vrc(r->encoding());} + static int vrs( VectorRegister r) { return vrs(r->encoding());} + static int vrt( VectorRegister r) { return vrt(r->encoding());} + + static int vsplt_uim( int x) { return opp_u_field(x, 15, 12); } // for vsplt* instructions + static int vsplti_sim(int x) { return opp_u_field(x, 15, 11); } // for vsplti* instructions + static int vsldoi_shb(int x) { return opp_u_field(x, 25, 22); } // for vsldoi instruction + static int vcmp_rc( int x) { return opp_u_field(x, 21, 21); } // for vcmp* instructions + + //static int xo1( int x) { return opp_u_field(x, 29, 21); }// is contained in our opcodes + //static int xo2( int x) { return opp_u_field(x, 30, 21); }// is contained in our opcodes + //static int xo3( int x) { return opp_u_field(x, 30, 22); }// is contained in our opcodes + //static int xo4( int x) { return opp_u_field(x, 30, 26); }// is contained in our opcodes + //static int xo5( int x) { return opp_u_field(x, 29, 27); }// is contained in our opcodes + //static int xo6( int x) { return opp_u_field(x, 30, 27); }// is contained in our opcodes + //static int xo7( int x) { return opp_u_field(x, 31, 30); }// is contained in our opcodes + + protected: + // Compute relative address for branch. + static intptr_t disp(intptr_t x, intptr_t off) { + int xx = x - off; + xx = xx >> 2; + return xx; + } + + public: + // signed immediate, in low bits, nbits long + static int simm(int x, int nbits) { + assert_signed_range(x, nbits); + return x & ((1 << nbits) - 1); + } + + // unsigned immediate, in low bits, nbits long + static int uimm(int x, int nbits) { + assert_unsigned_const(x, nbits); + return x & ((1 << nbits) - 1); + } + + static void set_imm(int* instr, short s) { + short* p = ((short *)instr) + 1; + *p = s; + } + + static int get_imm(address a, int instruction_number) { + short imm; + short *p =((short *)a)+2*instruction_number+1; + imm = *p; + return (int)imm; + } + + static inline int hi16_signed( int x) { return (int)(int16_t)(x >> 16); } + static inline int lo16_unsigned(int x) { return x & 0xffff; } + + protected: + + // Extract the top 32 bits in a 64 bit word. + static int32_t hi32(int64_t x) { + int32_t r = int32_t((uint64_t)x >> 32); + return r; + } + + public: + + static inline unsigned int align_addr(unsigned int addr, unsigned int a) { + return ((addr + (a - 1)) & ~(a - 1)); + } + + static inline bool is_aligned(unsigned int addr, unsigned int a) { + return (0 == addr % a); + } + + void flush() { + AbstractAssembler::flush(); + } + + inline void emit_int32(int); // shadows AbstractAssembler::emit_int32 + inline void emit_data(int); + inline void emit_data(int, RelocationHolder const&); + inline void emit_data(int, relocInfo::relocType rtype); + + // Emit an address. + inline address emit_addr(const address addr = NULL); + + // Emit a function descriptor with the specified entry point, TOC, + // and ENV. If the entry point is NULL, the descriptor will point + // just past the descriptor. + // Use values from friend functions as defaults. + inline address emit_fd(address entry = NULL, + address toc = (address) FunctionDescriptor::friend_toc, + address env = (address) FunctionDescriptor::friend_env); + + ///////////////////////////////////////////////////////////////////////////////////// + // PPC instructions + ///////////////////////////////////////////////////////////////////////////////////// + + // Memory instructions use r0 as hard coded 0, e.g. to simulate loading + // immediates. The normal instruction encoders enforce that r0 is not + // passed to them. Use either extended mnemonics encoders or the special ra0 + // versions. + + // Issue an illegal instruction. + inline void illtrap(); + static inline bool is_illtrap(int x); + + // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions + inline void addi( Register d, Register a, int si16); + inline void addis(Register d, Register a, int si16); + private: + inline void addi_r0ok( Register d, Register a, int si16); + inline void addis_r0ok(Register d, Register a, int si16); + public: + inline void addic_( Register d, Register a, int si16); + inline void subfic( Register d, Register a, int si16); + inline void add( Register d, Register a, Register b); + inline void add_( Register d, Register a, Register b); + inline void subf( Register d, Register a, Register b); + inline void sub( Register d, Register a, Register b); + inline void subf_( Register d, Register a, Register b); + inline void addc( Register d, Register a, Register b); + inline void addc_( Register d, Register a, Register b); + inline void subfc( Register d, Register a, Register b); + inline void subfc_( Register d, Register a, Register b); + inline void adde( Register d, Register a, Register b); + inline void adde_( Register d, Register a, Register b); + inline void subfe( Register d, Register a, Register b); + inline void subfe_( Register d, Register a, Register b); + inline void neg( Register d, Register a); + inline void neg_( Register d, Register a); + inline void mulli( Register d, Register a, int si16); + inline void mulld( Register d, Register a, Register b); + inline void mulld_( Register d, Register a, Register b); + inline void mullw( Register d, Register a, Register b); + inline void mullw_( Register d, Register a, Register b); + inline void mulhw( Register d, Register a, Register b); + inline void mulhw_( Register d, Register a, Register b); + inline void mulhd( Register d, Register a, Register b); + inline void mulhd_( Register d, Register a, Register b); + inline void mulhdu( Register d, Register a, Register b); + inline void mulhdu_(Register d, Register a, Register b); + inline void divd( Register d, Register a, Register b); + inline void divd_( Register d, Register a, Register b); + inline void divw( Register d, Register a, Register b); + inline void divw_( Register d, Register a, Register b); + + // extended mnemonics + inline void li( Register d, int si16); + inline void lis( Register d, int si16); + inline void addir(Register d, int si16, Register a); + + static bool is_addi(int x) { + return ADDI_OPCODE == (x & ADDI_OPCODE_MASK); + } + static bool is_addis(int x) { + return ADDIS_OPCODE == (x & ADDIS_OPCODE_MASK); + } + static bool is_bxx(int x) { + return BXX_OPCODE == (x & BXX_OPCODE_MASK); + } + static bool is_b(int x) { + return BXX_OPCODE == (x & BXX_OPCODE_MASK) && inv_lk_field(x) == 0; + } + static bool is_bl(int x) { + return BXX_OPCODE == (x & BXX_OPCODE_MASK) && inv_lk_field(x) == 1; + } + static bool is_bcxx(int x) { + return BCXX_OPCODE == (x & BCXX_OPCODE_MASK); + } + static bool is_bxx_or_bcxx(int x) { + return is_bxx(x) || is_bcxx(x); + } + static bool is_bctrl(int x) { + return x == 0x4e800421; + } + static bool is_bctr(int x) { + return x == 0x4e800420; + } + static bool is_bclr(int x) { + return BCLR_OPCODE == (x & XL_FORM_OPCODE_MASK); + } + static bool is_li(int x) { + return is_addi(x) && inv_ra_field(x)==0; + } + static bool is_lis(int x) { + return is_addis(x) && inv_ra_field(x)==0; + } + static bool is_mtctr(int x) { + return MTCTR_OPCODE == (x & MTCTR_OPCODE_MASK); + } + static bool is_ld(int x) { + return LD_OPCODE == (x & LD_OPCODE_MASK); + } + static bool is_std(int x) { + return STD_OPCODE == (x & STD_OPCODE_MASK); + } + static bool is_stdu(int x) { + return STDU_OPCODE == (x & STDU_OPCODE_MASK); + } + static bool is_stdx(int x) { + return STDX_OPCODE == (x & STDX_OPCODE_MASK); + } + static bool is_stdux(int x) { + return STDUX_OPCODE == (x & STDUX_OPCODE_MASK); + } + static bool is_stwx(int x) { + return STWX_OPCODE == (x & STWX_OPCODE_MASK); + } + static bool is_stwux(int x) { + return STWUX_OPCODE == (x & STWUX_OPCODE_MASK); + } + static bool is_stw(int x) { + return STW_OPCODE == (x & STW_OPCODE_MASK); + } + static bool is_stwu(int x) { + return STWU_OPCODE == (x & STWU_OPCODE_MASK); + } + static bool is_ori(int x) { + return ORI_OPCODE == (x & ORI_OPCODE_MASK); + }; + static bool is_oris(int x) { + return ORIS_OPCODE == (x & ORIS_OPCODE_MASK); + }; + static bool is_rldicr(int x) { + return (RLDICR_OPCODE == (x & RLDICR_OPCODE_MASK)); + }; + static bool is_nop(int x) { + return x == 0x60000000; + } + // endgroup opcode for Power6 + static bool is_endgroup(int x) { + return is_ori(x) && inv_ra_field(x)==1 && inv_rs_field(x)==1 && inv_d1_field(x)==0; + } + + + private: + // PPC 1, section 3.3.9, Fixed-Point Compare Instructions + inline void cmpi( ConditionRegister bf, int l, Register a, int si16); + inline void cmp( ConditionRegister bf, int l, Register a, Register b); + inline void cmpli(ConditionRegister bf, int l, Register a, int ui16); + inline void cmpl( ConditionRegister bf, int l, Register a, Register b); + + public: + // extended mnemonics of Compare Instructions + inline void cmpwi( ConditionRegister crx, Register a, int si16); + inline void cmpdi( ConditionRegister crx, Register a, int si16); + inline void cmpw( ConditionRegister crx, Register a, Register b); + inline void cmpd( ConditionRegister crx, Register a, Register b); + inline void cmplwi(ConditionRegister crx, Register a, int ui16); + inline void cmpldi(ConditionRegister crx, Register a, int ui16); + inline void cmplw( ConditionRegister crx, Register a, Register b); + inline void cmpld( ConditionRegister crx, Register a, Register b); + + inline void isel( Register d, Register a, Register b, int bc); + + // PPC 1, section 3.3.11, Fixed-Point Logical Instructions + void andi( Register a, Register s, int ui16); // optimized version + inline void andi_( Register a, Register s, int ui16); + inline void andis_( Register a, Register s, int ui16); + inline void ori( Register a, Register s, int ui16); + inline void oris( Register a, Register s, int ui16); + inline void xori( Register a, Register s, int ui16); + inline void xoris( Register a, Register s, int ui16); + inline void andr( Register a, Register s, Register b); // suffixed by 'r' as 'and' is C++ keyword + inline void and_( Register a, Register s, Register b); + // Turn or0(rx,rx,rx) into a nop and avoid that we accidently emit a + // SMT-priority change instruction (see SMT instructions below). + inline void or_unchecked(Register a, Register s, Register b); + inline void orr( Register a, Register s, Register b); // suffixed by 'r' as 'or' is C++ keyword + inline void or_( Register a, Register s, Register b); + inline void xorr( Register a, Register s, Register b); // suffixed by 'r' as 'xor' is C++ keyword + inline void xor_( Register a, Register s, Register b); + inline void nand( Register a, Register s, Register b); + inline void nand_( Register a, Register s, Register b); + inline void nor( Register a, Register s, Register b); + inline void nor_( Register a, Register s, Register b); + inline void andc( Register a, Register s, Register b); + inline void andc_( Register a, Register s, Register b); + inline void orc( Register a, Register s, Register b); + inline void orc_( Register a, Register s, Register b); + inline void extsb( Register a, Register s); + inline void extsh( Register a, Register s); + inline void extsw( Register a, Register s); + + // extended mnemonics + inline void nop(); + // NOP for FP and BR units (different versions to allow them to be in one group) + inline void fpnop0(); + inline void fpnop1(); + inline void brnop0(); + inline void brnop1(); + inline void brnop2(); + + inline void mr( Register d, Register s); + inline void ori_opt( Register d, int ui16); + inline void oris_opt(Register d, int ui16); + + // endgroup opcode for Power6 + inline void endgroup(); + + // count instructions + inline void cntlzw( Register a, Register s); + inline void cntlzw_( Register a, Register s); + inline void cntlzd( Register a, Register s); + inline void cntlzd_( Register a, Register s); + + // PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions + inline void sld( Register a, Register s, Register b); + inline void sld_( Register a, Register s, Register b); + inline void slw( Register a, Register s, Register b); + inline void slw_( Register a, Register s, Register b); + inline void srd( Register a, Register s, Register b); + inline void srd_( Register a, Register s, Register b); + inline void srw( Register a, Register s, Register b); + inline void srw_( Register a, Register s, Register b); + inline void srad( Register a, Register s, Register b); + inline void srad_( Register a, Register s, Register b); + inline void sraw( Register a, Register s, Register b); + inline void sraw_( Register a, Register s, Register b); + inline void sradi( Register a, Register s, int sh6); + inline void sradi_( Register a, Register s, int sh6); + inline void srawi( Register a, Register s, int sh5); + inline void srawi_( Register a, Register s, int sh5); + + // extended mnemonics for Shift Instructions + inline void sldi( Register a, Register s, int sh6); + inline void sldi_( Register a, Register s, int sh6); + inline void slwi( Register a, Register s, int sh5); + inline void slwi_( Register a, Register s, int sh5); + inline void srdi( Register a, Register s, int sh6); + inline void srdi_( Register a, Register s, int sh6); + inline void srwi( Register a, Register s, int sh5); + inline void srwi_( Register a, Register s, int sh5); + + inline void clrrdi( Register a, Register s, int ui6); + inline void clrrdi_( Register a, Register s, int ui6); + inline void clrldi( Register a, Register s, int ui6); + inline void clrldi_( Register a, Register s, int ui6); + inline void clrlsldi(Register a, Register s, int clrl6, int shl6); + inline void clrlsldi_(Register a, Register s, int clrl6, int shl6); + inline void extrdi( Register a, Register s, int n, int b); + // testbit with condition register + inline void testbitdi(ConditionRegister cr, Register a, Register s, int ui6); + + // rotate instructions + inline void rotldi( Register a, Register s, int n); + inline void rotrdi( Register a, Register s, int n); + inline void rotlwi( Register a, Register s, int n); + inline void rotrwi( Register a, Register s, int n); + + // Rotate Instructions + inline void rldic( Register a, Register s, int sh6, int mb6); + inline void rldic_( Register a, Register s, int sh6, int mb6); + inline void rldicr( Register a, Register s, int sh6, int mb6); + inline void rldicr_( Register a, Register s, int sh6, int mb6); + inline void rldicl( Register a, Register s, int sh6, int mb6); + inline void rldicl_( Register a, Register s, int sh6, int mb6); + inline void rlwinm( Register a, Register s, int sh5, int mb5, int me5); + inline void rlwinm_( Register a, Register s, int sh5, int mb5, int me5); + inline void rldimi( Register a, Register s, int sh6, int mb6); + inline void rldimi_( Register a, Register s, int sh6, int mb6); + inline void rlwimi( Register a, Register s, int sh5, int mb5, int me5); + inline void insrdi( Register a, Register s, int n, int b); + inline void insrwi( Register a, Register s, int n, int b); + + // PPC 1, section 3.3.2 Fixed-Point Load Instructions + // 4 bytes + inline void lwzx( Register d, Register s1, Register s2); + inline void lwz( Register d, int si16, Register s1); + inline void lwzu( Register d, int si16, Register s1); + + // 4 bytes + inline void lwax( Register d, Register s1, Register s2); + inline void lwa( Register d, int si16, Register s1); + + // 2 bytes + inline void lhzx( Register d, Register s1, Register s2); + inline void lhz( Register d, int si16, Register s1); + inline void lhzu( Register d, int si16, Register s1); + + // 2 bytes + inline void lhax( Register d, Register s1, Register s2); + inline void lha( Register d, int si16, Register s1); + inline void lhau( Register d, int si16, Register s1); + + // 1 byte + inline void lbzx( Register d, Register s1, Register s2); + inline void lbz( Register d, int si16, Register s1); + inline void lbzu( Register d, int si16, Register s1); + + // 8 bytes + inline void ldx( Register d, Register s1, Register s2); + inline void ld( Register d, int si16, Register s1); + inline void ldu( Register d, int si16, Register s1); + + // PPC 1, section 3.3.3 Fixed-Point Store Instructions + inline void stwx( Register d, Register s1, Register s2); + inline void stw( Register d, int si16, Register s1); + inline void stwu( Register d, int si16, Register s1); + + inline void sthx( Register d, Register s1, Register s2); + inline void sth( Register d, int si16, Register s1); + inline void sthu( Register d, int si16, Register s1); + + inline void stbx( Register d, Register s1, Register s2); + inline void stb( Register d, int si16, Register s1); + inline void stbu( Register d, int si16, Register s1); + + inline void stdx( Register d, Register s1, Register s2); + inline void std( Register d, int si16, Register s1); + inline void stdu( Register d, int si16, Register s1); + inline void stdux(Register s, Register a, Register b); + + // PPC 1, section 3.3.13 Move To/From System Register Instructions + inline void mtlr( Register s1); + inline void mflr( Register d); + inline void mtctr(Register s1); + inline void mfctr(Register d); + inline void mtcrf(int fxm, Register s); + inline void mfcr( Register d); + inline void mcrf( ConditionRegister crd, ConditionRegister cra); + inline void mtcr( Register s); + + // PPC 1, section 2.4.1 Branch Instructions + inline void b( address a, relocInfo::relocType rt = relocInfo::none); + inline void b( Label& L); + inline void bl( address a, relocInfo::relocType rt = relocInfo::none); + inline void bl( Label& L); + inline void bc( int boint, int biint, address a, relocInfo::relocType rt = relocInfo::none); + inline void bc( int boint, int biint, Label& L); + inline void bcl(int boint, int biint, address a, relocInfo::relocType rt = relocInfo::none); + inline void bcl(int boint, int biint, Label& L); + + inline void bclr( int boint, int biint, int bhint, relocInfo::relocType rt = relocInfo::none); + inline void bclrl( int boint, int biint, int bhint, relocInfo::relocType rt = relocInfo::none); + inline void bcctr( int boint, int biint, int bhint = bhintbhBCCTRisNotReturnButSame, + relocInfo::relocType rt = relocInfo::none); + inline void bcctrl(int boint, int biint, int bhint = bhintbhBCLRisReturn, + relocInfo::relocType rt = relocInfo::none); + + // helper function for b, bcxx + inline bool is_within_range_of_b(address a, address pc); + inline bool is_within_range_of_bcxx(address a, address pc); + + // get the destination of a bxx branch (b, bl, ba, bla) + static inline address bxx_destination(address baddr); + static inline address bxx_destination(int instr, address pc); + static inline intptr_t bxx_destination_offset(int instr, intptr_t bxx_pos); + + // extended mnemonics for branch instructions + inline void blt(ConditionRegister crx, Label& L); + inline void bgt(ConditionRegister crx, Label& L); + inline void beq(ConditionRegister crx, Label& L); + inline void bso(ConditionRegister crx, Label& L); + inline void bge(ConditionRegister crx, Label& L); + inline void ble(ConditionRegister crx, Label& L); + inline void bne(ConditionRegister crx, Label& L); + inline void bns(ConditionRegister crx, Label& L); + + // Branch instructions with static prediction hints. + inline void blt_predict_taken( ConditionRegister crx, Label& L); + inline void bgt_predict_taken( ConditionRegister crx, Label& L); + inline void beq_predict_taken( ConditionRegister crx, Label& L); + inline void bso_predict_taken( ConditionRegister crx, Label& L); + inline void bge_predict_taken( ConditionRegister crx, Label& L); + inline void ble_predict_taken( ConditionRegister crx, Label& L); + inline void bne_predict_taken( ConditionRegister crx, Label& L); + inline void bns_predict_taken( ConditionRegister crx, Label& L); + inline void blt_predict_not_taken(ConditionRegister crx, Label& L); + inline void bgt_predict_not_taken(ConditionRegister crx, Label& L); + inline void beq_predict_not_taken(ConditionRegister crx, Label& L); + inline void bso_predict_not_taken(ConditionRegister crx, Label& L); + inline void bge_predict_not_taken(ConditionRegister crx, Label& L); + inline void ble_predict_not_taken(ConditionRegister crx, Label& L); + inline void bne_predict_not_taken(ConditionRegister crx, Label& L); + inline void bns_predict_not_taken(ConditionRegister crx, Label& L); + + // for use in conjunction with testbitdi: + inline void btrue( ConditionRegister crx, Label& L); + inline void bfalse(ConditionRegister crx, Label& L); + + inline void bltl(ConditionRegister crx, Label& L); + inline void bgtl(ConditionRegister crx, Label& L); + inline void beql(ConditionRegister crx, Label& L); + inline void bsol(ConditionRegister crx, Label& L); + inline void bgel(ConditionRegister crx, Label& L); + inline void blel(ConditionRegister crx, Label& L); + inline void bnel(ConditionRegister crx, Label& L); + inline void bnsl(ConditionRegister crx, Label& L); + + // extended mnemonics for Branch Instructions via LR + // We use `blr' for returns. + inline void blr(relocInfo::relocType rt = relocInfo::none); + + // extended mnemonics for Branch Instructions with CTR + // bdnz means `decrement CTR and jump to L if CTR is not zero' + inline void bdnz(Label& L); + // Decrement and branch if result is zero. + inline void bdz(Label& L); + // we use `bctr[l]' for jumps/calls in function descriptor glue + // code, e.g. calls to runtime functions + inline void bctr( relocInfo::relocType rt = relocInfo::none); + inline void bctrl(relocInfo::relocType rt = relocInfo::none); + // conditional jumps/branches via CTR + inline void beqctr( ConditionRegister crx, relocInfo::relocType rt = relocInfo::none); + inline void beqctrl(ConditionRegister crx, relocInfo::relocType rt = relocInfo::none); + inline void bnectr( ConditionRegister crx, relocInfo::relocType rt = relocInfo::none); + inline void bnectrl(ConditionRegister crx, relocInfo::relocType rt = relocInfo::none); + + // condition register logic instructions + inline void crand( int d, int s1, int s2); + inline void crnand(int d, int s1, int s2); + inline void cror( int d, int s1, int s2); + inline void crxor( int d, int s1, int s2); + inline void crnor( int d, int s1, int s2); + inline void creqv( int d, int s1, int s2); + inline void crandc(int d, int s1, int s2); + inline void crorc( int d, int s1, int s2); + + // icache and dcache related instructions + inline void icbi( Register s1, Register s2); + //inline void dcba(Register s1, Register s2); // Instruction for embedded processor only. + inline void dcbz( Register s1, Register s2); + inline void dcbst( Register s1, Register s2); + inline void dcbf( Register s1, Register s2); + + enum ct_cache_specification { + ct_primary_cache = 0, + ct_secondary_cache = 2 + }; + // dcache read hint + inline void dcbt( Register s1, Register s2); + inline void dcbtct( Register s1, Register s2, int ct); + inline void dcbtds( Register s1, Register s2, int ds); + // dcache write hint + inline void dcbtst( Register s1, Register s2); + inline void dcbtstct(Register s1, Register s2, int ct); + + // machine barrier instructions: + // + // - sync two-way memory barrier, aka fence + // - lwsync orders Store|Store, + // Load|Store, + // Load|Load, + // but not Store|Load + // - eieio orders memory accesses for device memory (only) + // - isync invalidates speculatively executed instructions + // From the Power ISA 2.06 documentation: + // "[...] an isync instruction prevents the execution of + // instructions following the isync until instructions + // preceding the isync have completed, [...]" + // From IBM's AIX assembler reference: + // "The isync [...] instructions causes the processor to + // refetch any instructions that might have been fetched + // prior to the isync instruction. The instruction isync + // causes the processor to wait for all previous instructions + // to complete. Then any instructions already fetched are + // discarded and instruction processing continues in the + // environment established by the previous instructions." + // + // semantic barrier instructions: + // (as defined in orderAccess.hpp) + // + // - release orders Store|Store, (maps to lwsync) + // Load|Store + // - acquire orders Load|Store, (maps to lwsync) + // Load|Load + // - fence orders Store|Store, (maps to sync) + // Load|Store, + // Load|Load, + // Store|Load + // + private: + inline void sync(int l); + public: + inline void sync(); + inline void lwsync(); + inline void ptesync(); + inline void eieio(); + inline void isync(); + + inline void release(); + inline void acquire(); + inline void fence(); + + // atomics + inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0); + inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0); + inline bool lxarx_hint_exclusive_access(); + inline void lwarx( Register d, Register a, Register b, bool hint_exclusive_access = false); + inline void ldarx( Register d, Register a, Register b, bool hint_exclusive_access = false); + inline void stwcx_( Register s, Register a, Register b); + inline void stdcx_( Register s, Register a, Register b); + + // Instructions for adjusting thread priority for simultaneous + // multithreading (SMT) on Power5. + private: + inline void smt_prio_very_low(); + inline void smt_prio_medium_high(); + inline void smt_prio_high(); + + public: + inline void smt_prio_low(); + inline void smt_prio_medium_low(); + inline void smt_prio_medium(); + + // trap instructions + inline void twi_0(Register a); // for load with acquire semantics use load+twi_0+isync (trap can't occur) + // NOT FOR DIRECT USE!! + protected: + inline void tdi_unchecked(int tobits, Register a, int si16); + inline void twi_unchecked(int tobits, Register a, int si16); + inline void tdi( int tobits, Register a, int si16); // asserts UseSIGTRAP + inline void twi( int tobits, Register a, int si16); // asserts UseSIGTRAP + inline void td( int tobits, Register a, Register b); // asserts UseSIGTRAP + inline void tw( int tobits, Register a, Register b); // asserts UseSIGTRAP + + static bool is_tdi(int x, int tobits, int ra, int si16) { + return (TDI_OPCODE == (x & TDI_OPCODE_MASK)) + && (tobits == inv_to_field(x)) + && (ra == -1/*any reg*/ || ra == inv_ra_field(x)) + && (si16 == inv_si_field(x)); + } + + static bool is_twi(int x, int tobits, int ra, int si16) { + return (TWI_OPCODE == (x & TWI_OPCODE_MASK)) + && (tobits == inv_to_field(x)) + && (ra == -1/*any reg*/ || ra == inv_ra_field(x)) + && (si16 == inv_si_field(x)); + } + + static bool is_twi(int x, int tobits, int ra) { + return (TWI_OPCODE == (x & TWI_OPCODE_MASK)) + && (tobits == inv_to_field(x)) + && (ra == -1/*any reg*/ || ra == inv_ra_field(x)); + } + + static bool is_td(int x, int tobits, int ra, int rb) { + return (TD_OPCODE == (x & TD_OPCODE_MASK)) + && (tobits == inv_to_field(x)) + && (ra == -1/*any reg*/ || ra == inv_ra_field(x)) + && (rb == -1/*any reg*/ || rb == inv_rb_field(x)); + } + + static bool is_tw(int x, int tobits, int ra, int rb) { + return (TW_OPCODE == (x & TW_OPCODE_MASK)) + && (tobits == inv_to_field(x)) + && (ra == -1/*any reg*/ || ra == inv_ra_field(x)) + && (rb == -1/*any reg*/ || rb == inv_rb_field(x)); + } + + public: + // PPC floating point instructions + // PPC 1, section 4.6.2 Floating-Point Load Instructions + inline void lfs( FloatRegister d, int si16, Register a); + inline void lfsu( FloatRegister d, int si16, Register a); + inline void lfsx( FloatRegister d, Register a, Register b); + inline void lfd( FloatRegister d, int si16, Register a); + inline void lfdu( FloatRegister d, int si16, Register a); + inline void lfdx( FloatRegister d, Register a, Register b); + + // PPC 1, section 4.6.3 Floating-Point Store Instructions + inline void stfs( FloatRegister s, int si16, Register a); + inline void stfsu( FloatRegister s, int si16, Register a); + inline void stfsx( FloatRegister s, Register a, Register b); + inline void stfd( FloatRegister s, int si16, Register a); + inline void stfdu( FloatRegister s, int si16, Register a); + inline void stfdx( FloatRegister s, Register a, Register b); + + // PPC 1, section 4.6.4 Floating-Point Move Instructions + inline void fmr( FloatRegister d, FloatRegister b); + inline void fmr_( FloatRegister d, FloatRegister b); + + // inline void mffgpr( FloatRegister d, Register b); + // inline void mftgpr( Register d, FloatRegister b); + inline void cmpb( Register a, Register s, Register b); + inline void popcntb(Register a, Register s); + inline void popcntw(Register a, Register s); + inline void popcntd(Register a, Register s); + + inline void fneg( FloatRegister d, FloatRegister b); + inline void fneg_( FloatRegister d, FloatRegister b); + inline void fabs( FloatRegister d, FloatRegister b); + inline void fabs_( FloatRegister d, FloatRegister b); + inline void fnabs( FloatRegister d, FloatRegister b); + inline void fnabs_(FloatRegister d, FloatRegister b); + + // PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic Instructions + inline void fadd( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fadd_( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fadds( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fadds_(FloatRegister d, FloatRegister a, FloatRegister b); + inline void fsub( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fsub_( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fsubs( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fsubs_(FloatRegister d, FloatRegister a, FloatRegister b); + inline void fmul( FloatRegister d, FloatRegister a, FloatRegister c); + inline void fmul_( FloatRegister d, FloatRegister a, FloatRegister c); + inline void fmuls( FloatRegister d, FloatRegister a, FloatRegister c); + inline void fmuls_(FloatRegister d, FloatRegister a, FloatRegister c); + inline void fdiv( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fdiv_( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fdivs( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fdivs_(FloatRegister d, FloatRegister a, FloatRegister b); + + // PPC 1, section 4.6.6 Floating-Point Rounding and Conversion Instructions + inline void frsp( FloatRegister d, FloatRegister b); + inline void fctid( FloatRegister d, FloatRegister b); + inline void fctidz(FloatRegister d, FloatRegister b); + inline void fctiw( FloatRegister d, FloatRegister b); + inline void fctiwz(FloatRegister d, FloatRegister b); + inline void fcfid( FloatRegister d, FloatRegister b); + inline void fcfids(FloatRegister d, FloatRegister b); + + // PPC 1, section 4.6.7 Floating-Point Compare Instructions + inline void fcmpu( ConditionRegister crx, FloatRegister a, FloatRegister b); + + inline void fsqrt( FloatRegister d, FloatRegister b); + inline void fsqrts(FloatRegister d, FloatRegister b); + + // Vector instructions for >= Power6. + inline void lvebx( VectorRegister d, Register s1, Register s2); + inline void lvehx( VectorRegister d, Register s1, Register s2); + inline void lvewx( VectorRegister d, Register s1, Register s2); + inline void lvx( VectorRegister d, Register s1, Register s2); + inline void lvxl( VectorRegister d, Register s1, Register s2); + inline void stvebx( VectorRegister d, Register s1, Register s2); + inline void stvehx( VectorRegister d, Register s1, Register s2); + inline void stvewx( VectorRegister d, Register s1, Register s2); + inline void stvx( VectorRegister d, Register s1, Register s2); + inline void stvxl( VectorRegister d, Register s1, Register s2); + inline void lvsl( VectorRegister d, Register s1, Register s2); + inline void lvsr( VectorRegister d, Register s1, Register s2); + inline void vpkpx( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkshss( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkswss( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkshus( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkswus( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkuhum( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkuwum( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkuhus( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkuwus( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vupkhpx( VectorRegister d, VectorRegister b); + inline void vupkhsb( VectorRegister d, VectorRegister b); + inline void vupkhsh( VectorRegister d, VectorRegister b); + inline void vupklpx( VectorRegister d, VectorRegister b); + inline void vupklsb( VectorRegister d, VectorRegister b); + inline void vupklsh( VectorRegister d, VectorRegister b); + inline void vmrghb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmrghw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmrghh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmrglb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmrglw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmrglh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsplt( VectorRegister d, int ui4, VectorRegister b); + inline void vsplth( VectorRegister d, int ui3, VectorRegister b); + inline void vspltw( VectorRegister d, int ui2, VectorRegister b); + inline void vspltisb( VectorRegister d, int si5); + inline void vspltish( VectorRegister d, int si5); + inline void vspltisw( VectorRegister d, int si5); + inline void vperm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vsel( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vsl( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsldoi( VectorRegister d, VectorRegister a, VectorRegister b, int si4); + inline void vslo( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsr( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsro( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vaddcuw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vaddshs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vaddsbs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vaddsws( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vaddubm( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vadduwm( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vadduhm( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vaddubs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vadduws( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vadduhs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubcuw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubshs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubsbs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubsws( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsububm( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubuwm( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubuhm( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsububs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubuws( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubuhs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmulesb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmuleub( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmulesh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmuleuh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmulosb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmuloub( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmulosh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmulouh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmhaddshs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmhraddshs(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmladduhm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmsubuhm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmsummbm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmsumshm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmsumshs( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmsumuhm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmsumuhs( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vsumsws( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsum2sws( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsum4sbs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsum4ubs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsum4shs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vavgsb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vavgsw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vavgsh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vavgub( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vavguw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vavguh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmaxsb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmaxsw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmaxsh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmaxub( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmaxuw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmaxuh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vminsb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vminsw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vminsh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vminub( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vminuw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vminuh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpequb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpequh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpequw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtsh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtsb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtsw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtub( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtuh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtuw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpequb_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpequh_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpequw_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtsh_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtsb_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtsw_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtub_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtuh_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtuw_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vand( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vandc( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vnor( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vor( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vxor( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vrlb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vrlw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vrlh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vslb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vskw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vslh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsrb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsrw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsrh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsrab( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsraw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsrah( VectorRegister d, VectorRegister a, VectorRegister b); + // Vector Floating-Point not implemented yet + inline void mtvscr( VectorRegister b); + inline void mfvscr( VectorRegister d); + + // The following encoders use r0 as second operand. These instructions + // read r0 as '0'. + inline void lwzx( Register d, Register s2); + inline void lwz( Register d, int si16); + inline void lwax( Register d, Register s2); + inline void lwa( Register d, int si16); + inline void lhzx( Register d, Register s2); + inline void lhz( Register d, int si16); + inline void lhax( Register d, Register s2); + inline void lha( Register d, int si16); + inline void lbzx( Register d, Register s2); + inline void lbz( Register d, int si16); + inline void ldx( Register d, Register s2); + inline void ld( Register d, int si16); + inline void stwx( Register d, Register s2); + inline void stw( Register d, int si16); + inline void sthx( Register d, Register s2); + inline void sth( Register d, int si16); + inline void stbx( Register d, Register s2); + inline void stb( Register d, int si16); + inline void stdx( Register d, Register s2); + inline void std( Register d, int si16); + + // PPC 2, section 3.2.1 Instruction Cache Instructions + inline void icbi( Register s2); + // PPC 2, section 3.2.2 Data Cache Instructions + //inlinevoid dcba( Register s2); // Instruction for embedded processor only. + inline void dcbz( Register s2); + inline void dcbst( Register s2); + inline void dcbf( Register s2); + // dcache read hint + inline void dcbt( Register s2); + inline void dcbtct( Register s2, int ct); + inline void dcbtds( Register s2, int ds); + // dcache write hint + inline void dcbtst( Register s2); + inline void dcbtstct(Register s2, int ct); + + // Atomics: use ra0mem to disallow R0 as base. + inline void lwarx_unchecked(Register d, Register b, int eh1); + inline void ldarx_unchecked(Register d, Register b, int eh1); + inline void lwarx( Register d, Register b, bool hint_exclusive_access); + inline void ldarx( Register d, Register b, bool hint_exclusive_access); + inline void stwcx_(Register s, Register b); + inline void stdcx_(Register s, Register b); + inline void lfs( FloatRegister d, int si16); + inline void lfsx( FloatRegister d, Register b); + inline void lfd( FloatRegister d, int si16); + inline void lfdx( FloatRegister d, Register b); + inline void stfs( FloatRegister s, int si16); + inline void stfsx( FloatRegister s, Register b); + inline void stfd( FloatRegister s, int si16); + inline void stfdx( FloatRegister s, Register b); + inline void lvebx( VectorRegister d, Register s2); + inline void lvehx( VectorRegister d, Register s2); + inline void lvewx( VectorRegister d, Register s2); + inline void lvx( VectorRegister d, Register s2); + inline void lvxl( VectorRegister d, Register s2); + inline void stvebx(VectorRegister d, Register s2); + inline void stvehx(VectorRegister d, Register s2); + inline void stvewx(VectorRegister d, Register s2); + inline void stvx( VectorRegister d, Register s2); + inline void stvxl( VectorRegister d, Register s2); + inline void lvsl( VectorRegister d, Register s2); + inline void lvsr( VectorRegister d, Register s2); + + // RegisterOrConstant versions. + // These emitters choose between the versions using two registers and + // those with register and immediate, depending on the content of roc. + // If the constant is not encodable as immediate, instructions to + // load the constant are emitted beforehand. Store instructions need a + // tmp reg if the constant is not encodable as immediate. + // Size unpredictable. + void ld( Register d, RegisterOrConstant roc, Register s1 = noreg); + void lwa( Register d, RegisterOrConstant roc, Register s1 = noreg); + void lwz( Register d, RegisterOrConstant roc, Register s1 = noreg); + void lha( Register d, RegisterOrConstant roc, Register s1 = noreg); + void lhz( Register d, RegisterOrConstant roc, Register s1 = noreg); + void lbz( Register d, RegisterOrConstant roc, Register s1 = noreg); + void std( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg); + void stw( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg); + void sth( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg); + void stb( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg); + void add( Register d, RegisterOrConstant roc, Register s1); + void subf(Register d, RegisterOrConstant roc, Register s1); + void cmpd(ConditionRegister d, RegisterOrConstant roc, Register s1); + + + // Emit several instructions to load a 64 bit constant. This issues a fixed + // instruction pattern so that the constant can be patched later on. + enum { + load_const_size = 5 * BytesPerInstWord + }; + void load_const(Register d, long a, Register tmp = noreg); + inline void load_const(Register d, void* a, Register tmp = noreg); + inline void load_const(Register d, Label& L, Register tmp = noreg); + inline void load_const(Register d, AddressLiteral& a, Register tmp = noreg); + + // Load a 64 bit constant, optimized, not identifyable. + // Tmp can be used to increase ILP. Set return_simm16_rest=true to get a + // 16 bit immediate offset. This is useful if the offset can be encoded in + // a succeeding instruction. + int load_const_optimized(Register d, long a, Register tmp = noreg, bool return_simm16_rest = false); + inline int load_const_optimized(Register d, void* a, Register tmp = noreg, bool return_simm16_rest = false) { + return load_const_optimized(d, (long)(unsigned long)a, tmp, return_simm16_rest); + } + + // Creation + Assembler(CodeBuffer* code) : AbstractAssembler(code) { +#ifdef CHECK_DELAY + delay_state = no_delay; +#endif + } + + // Testing +#ifndef PRODUCT + void test_asm(); +#endif +}; + + +#endif // CPU_PPC_VM_ASSEMBLER_PPC_HPP diff --git a/src/cpu/ppc/vm/assembler_ppc.inline.hpp b/src/cpu/ppc/vm/assembler_ppc.inline.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp @@ -0,0 +1,792 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP +#define CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +inline void Assembler::emit_int32(int x) { + AbstractAssembler::emit_int32(x); +} + +inline void Assembler::emit_data(int x) { + emit_int32(x); +} + +inline void Assembler::emit_data(int x, relocInfo::relocType rtype) { + relocate(rtype); + emit_int32(x); +} + +inline void Assembler::emit_data(int x, RelocationHolder const& rspec) { + relocate(rspec); + emit_int32(x); +} + +// Emit an address +inline address Assembler::emit_addr(const address addr) { + address start = pc(); + emit_address(addr); + return start; +} + +// Emit a function descriptor with the specified entry point, TOC, and +// ENV. If the entry point is NULL, the descriptor will point just +// past the descriptor. +inline address Assembler::emit_fd(address entry, address toc, address env) { + FunctionDescriptor* fd = (FunctionDescriptor*)pc(); + + assert(sizeof(FunctionDescriptor) == 3*sizeof(address), "function descriptor size"); + + (void)emit_addr(); + (void)emit_addr(); + (void)emit_addr(); + + fd->set_entry(entry == NULL ? pc() : entry); + fd->set_toc(toc); + fd->set_env(env); + + return (address)fd; +} + +// Issue an illegal instruction. 0 is guaranteed to be an illegal instruction. +inline void Assembler::illtrap() { Assembler::emit_int32(0); } +inline bool Assembler::is_illtrap(int x) { return x == 0; } + +// PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions +inline void Assembler::addi( Register d, Register a, int si16) { assert(a != R0, "r0 not allowed"); addi_r0ok( d, a, si16); } +inline void Assembler::addis( Register d, Register a, int si16) { assert(a != R0, "r0 not allowed"); addis_r0ok(d, a, si16); } +inline void Assembler::addi_r0ok(Register d,Register a,int si16) { emit_int32(ADDI_OPCODE | rt(d) | ra(a) | simm(si16, 16)); } +inline void Assembler::addis_r0ok(Register d,Register a,int si16) { emit_int32(ADDIS_OPCODE | rt(d) | ra(a) | simm(si16, 16)); } +inline void Assembler::addic_( Register d, Register a, int si16) { emit_int32(ADDIC__OPCODE | rt(d) | ra(a) | simm(si16, 16)); } +inline void Assembler::subfic( Register d, Register a, int si16) { emit_int32(SUBFIC_OPCODE | rt(d) | ra(a) | simm(si16, 16)); } +inline void Assembler::add( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::add_( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::subf( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::sub( Register d, Register a, Register b) { subf(d, b, a); } +inline void Assembler::subf_( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::addc( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::addc_( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::subfc( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::subfc_( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::adde( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::adde_( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::subfe( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::subfe_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::neg( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); } +inline void Assembler::neg_( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); } +inline void Assembler::mulli( Register d, Register a, int si16) { emit_int32(MULLI_OPCODE | rt(d) | ra(a) | simm(si16, 16)); } +inline void Assembler::mulld( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::mulld_( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::mullw( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::mullw_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::mulhw( Register d, Register a, Register b) { emit_int32(MULHW_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); } +inline void Assembler::mulhw_( Register d, Register a, Register b) { emit_int32(MULHW_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); } +inline void Assembler::mulhd( Register d, Register a, Register b) { emit_int32(MULHD_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); } +inline void Assembler::mulhd_( Register d, Register a, Register b) { emit_int32(MULHD_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); } +inline void Assembler::mulhdu( Register d, Register a, Register b) { emit_int32(MULHDU_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); } +inline void Assembler::mulhdu_(Register d, Register a, Register b) { emit_int32(MULHDU_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); } +inline void Assembler::divd( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::divd_( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::divw( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::divw_( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } + +// extended mnemonics +inline void Assembler::li( Register d, int si16) { Assembler::addi_r0ok( d, R0, si16); } +inline void Assembler::lis( Register d, int si16) { Assembler::addis_r0ok(d, R0, si16); } +inline void Assembler::addir(Register d, int si16, Register a) { Assembler::addi(d, a, si16); } + +// PPC 1, section 3.3.9, Fixed-Point Compare Instructions +inline void Assembler::cmpi( ConditionRegister f, int l, Register a, int si16) { emit_int32( CMPI_OPCODE | bf(f) | l10(l) | ra(a) | simm(si16,16)); } +inline void Assembler::cmp( ConditionRegister f, int l, Register a, Register b) { emit_int32( CMP_OPCODE | bf(f) | l10(l) | ra(a) | rb(b)); } +inline void Assembler::cmpli( ConditionRegister f, int l, Register a, int ui16) { emit_int32( CMPLI_OPCODE | bf(f) | l10(l) | ra(a) | uimm(ui16,16)); } +inline void Assembler::cmpl( ConditionRegister f, int l, Register a, Register b) { emit_int32( CMPL_OPCODE | bf(f) | l10(l) | ra(a) | rb(b)); } + +// extended mnemonics of Compare Instructions +inline void Assembler::cmpwi( ConditionRegister crx, Register a, int si16) { Assembler::cmpi( crx, 0, a, si16); } +inline void Assembler::cmpdi( ConditionRegister crx, Register a, int si16) { Assembler::cmpi( crx, 1, a, si16); } +inline void Assembler::cmpw( ConditionRegister crx, Register a, Register b) { Assembler::cmp( crx, 0, a, b); } +inline void Assembler::cmpd( ConditionRegister crx, Register a, Register b) { Assembler::cmp( crx, 1, a, b); } +inline void Assembler::cmplwi(ConditionRegister crx, Register a, int ui16) { Assembler::cmpli(crx, 0, a, ui16); } +inline void Assembler::cmpldi(ConditionRegister crx, Register a, int ui16) { Assembler::cmpli(crx, 1, a, ui16); } +inline void Assembler::cmplw( ConditionRegister crx, Register a, Register b) { Assembler::cmpl( crx, 0, a, b); } +inline void Assembler::cmpld( ConditionRegister crx, Register a, Register b) { Assembler::cmpl( crx, 1, a, b); } + +inline void Assembler::isel(Register d, Register a, Register b, int c) { emit_int32(ISEL_OPCODE | rt(d) | ra(a) | rb(b) | bc(c)); } + +// PPC 1, section 3.3.11, Fixed-Point Logical Instructions +inline void Assembler::andi_( Register a, Register s, int ui16) { emit_int32(ANDI_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); } +inline void Assembler::andis_( Register a, Register s, int ui16) { emit_int32(ANDIS_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); } +inline void Assembler::ori( Register a, Register s, int ui16) { emit_int32(ORI_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); } +inline void Assembler::oris( Register a, Register s, int ui16) { emit_int32(ORIS_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); } +inline void Assembler::xori( Register a, Register s, int ui16) { emit_int32(XORI_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); } +inline void Assembler::xoris( Register a, Register s, int ui16) { emit_int32(XORIS_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); } +inline void Assembler::andr( Register a, Register s, Register b) { emit_int32(AND_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::and_( Register a, Register s, Register b) { emit_int32(AND_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } + +inline void Assembler::or_unchecked(Register a, Register s, Register b){ emit_int32(OR_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::orr( Register a, Register s, Register b) { if (a==s && s==b) { Assembler::nop(); } else { Assembler::or_unchecked(a,s,b); } } +inline void Assembler::or_( Register a, Register s, Register b) { emit_int32(OR_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::xorr( Register a, Register s, Register b) { emit_int32(XOR_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::xor_( Register a, Register s, Register b) { emit_int32(XOR_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::nand( Register a, Register s, Register b) { emit_int32(NAND_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::nand_( Register a, Register s, Register b) { emit_int32(NAND_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::nor( Register a, Register s, Register b) { emit_int32(NOR_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::nor_( Register a, Register s, Register b) { emit_int32(NOR_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::andc( Register a, Register s, Register b) { emit_int32(ANDC_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::andc_( Register a, Register s, Register b) { emit_int32(ANDC_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::orc( Register a, Register s, Register b) { emit_int32(ORC_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::orc_( Register a, Register s, Register b) { emit_int32(ORC_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::extsb( Register a, Register s) { emit_int32(EXTSB_OPCODE | rta(a) | rs(s) | rc(0)); } +inline void Assembler::extsh( Register a, Register s) { emit_int32(EXTSH_OPCODE | rta(a) | rs(s) | rc(0)); } +inline void Assembler::extsw( Register a, Register s) { emit_int32(EXTSW_OPCODE | rta(a) | rs(s) | rc(0)); } + +// extended mnemonics +inline void Assembler::nop() { Assembler::ori(R0, R0, 0); } +// NOP for FP and BR units (different versions to allow them to be in one group) +inline void Assembler::fpnop0() { Assembler::fmr(F30, F30); } +inline void Assembler::fpnop1() { Assembler::fmr(F31, F31); } +inline void Assembler::brnop0() { Assembler::mcrf(CCR2, CCR2); } +inline void Assembler::brnop1() { Assembler::mcrf(CCR3, CCR3); } +inline void Assembler::brnop2() { Assembler::mcrf(CCR4, CCR4); } + +inline void Assembler::mr( Register d, Register s) { Assembler::orr(d, s, s); } +inline void Assembler::ori_opt( Register d, int ui16) { if (ui16!=0) Assembler::ori( d, d, ui16); } +inline void Assembler::oris_opt(Register d, int ui16) { if (ui16!=0) Assembler::oris(d, d, ui16); } + +inline void Assembler::endgroup() { Assembler::ori(R1, R1, 0); } + +// count instructions +inline void Assembler::cntlzw( Register a, Register s) { emit_int32(CNTLZW_OPCODE | rta(a) | rs(s) | rc(0)); } +inline void Assembler::cntlzw_( Register a, Register s) { emit_int32(CNTLZW_OPCODE | rta(a) | rs(s) | rc(1)); } +inline void Assembler::cntlzd( Register a, Register s) { emit_int32(CNTLZD_OPCODE | rta(a) | rs(s) | rc(0)); } +inline void Assembler::cntlzd_( Register a, Register s) { emit_int32(CNTLZD_OPCODE | rta(a) | rs(s) | rc(1)); } + +// PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions +inline void Assembler::sld( Register a, Register s, Register b) { emit_int32(SLD_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::sld_( Register a, Register s, Register b) { emit_int32(SLD_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::slw( Register a, Register s, Register b) { emit_int32(SLW_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::slw_( Register a, Register s, Register b) { emit_int32(SLW_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::srd( Register a, Register s, Register b) { emit_int32(SRD_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::srd_( Register a, Register s, Register b) { emit_int32(SRD_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::srw( Register a, Register s, Register b) { emit_int32(SRW_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::srw_( Register a, Register s, Register b) { emit_int32(SRW_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::srad( Register a, Register s, Register b) { emit_int32(SRAD_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::srad_( Register a, Register s, Register b) { emit_int32(SRAD_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::sraw( Register a, Register s, Register b) { emit_int32(SRAW_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::sraw_( Register a, Register s, Register b) { emit_int32(SRAW_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::sradi( Register a, Register s, int sh6) { emit_int32(SRADI_OPCODE | rta(a) | rs(s) | sh162030(sh6) | rc(0)); } +inline void Assembler::sradi_( Register a, Register s, int sh6) { emit_int32(SRADI_OPCODE | rta(a) | rs(s) | sh162030(sh6) | rc(1)); } +inline void Assembler::srawi( Register a, Register s, int sh5) { emit_int32(SRAWI_OPCODE | rta(a) | rs(s) | sh1620(sh5) | rc(0)); } +inline void Assembler::srawi_( Register a, Register s, int sh5) { emit_int32(SRAWI_OPCODE | rta(a) | rs(s) | sh1620(sh5) | rc(1)); } + +// extended mnemonics for Shift Instructions +inline void Assembler::sldi( Register a, Register s, int sh6) { Assembler::rldicr(a, s, sh6, 63-sh6); } +inline void Assembler::sldi_( Register a, Register s, int sh6) { Assembler::rldicr_(a, s, sh6, 63-sh6); } +inline void Assembler::slwi( Register a, Register s, int sh5) { Assembler::rlwinm(a, s, sh5, 0, 31-sh5); } +inline void Assembler::slwi_( Register a, Register s, int sh5) { Assembler::rlwinm_(a, s, sh5, 0, 31-sh5); } +inline void Assembler::srdi( Register a, Register s, int sh6) { Assembler::rldicl(a, s, 64-sh6, sh6); } +inline void Assembler::srdi_( Register a, Register s, int sh6) { Assembler::rldicl_(a, s, 64-sh6, sh6); } +inline void Assembler::srwi( Register a, Register s, int sh5) { Assembler::rlwinm(a, s, 32-sh5, sh5, 31); } +inline void Assembler::srwi_( Register a, Register s, int sh5) { Assembler::rlwinm_(a, s, 32-sh5, sh5, 31); } + +inline void Assembler::clrrdi( Register a, Register s, int ui6) { Assembler::rldicr(a, s, 0, 63-ui6); } +inline void Assembler::clrrdi_( Register a, Register s, int ui6) { Assembler::rldicr_(a, s, 0, 63-ui6); } +inline void Assembler::clrldi( Register a, Register s, int ui6) { Assembler::rldicl(a, s, 0, ui6); } +inline void Assembler::clrldi_( Register a, Register s, int ui6) { Assembler::rldicl_(a, s, 0, ui6); } +inline void Assembler::clrlsldi( Register a, Register s, int clrl6, int shl6) { Assembler::rldic( a, s, shl6, clrl6-shl6); } +inline void Assembler::clrlsldi_(Register a, Register s, int clrl6, int shl6) { Assembler::rldic_(a, s, shl6, clrl6-shl6); } +inline void Assembler::extrdi( Register a, Register s, int n, int b){ Assembler::rldicl(a, s, b+n, 64-n); } +// testbit with condition register. +inline void Assembler::testbitdi(ConditionRegister cr, Register a, Register s, int ui6) { + Assembler::rldicr(a, s, 63-ui6, 0); + Assembler::cmpdi(cr, a, 0); +} + +// rotate instructions +inline void Assembler::rotldi( Register a, Register s, int n) { Assembler::rldicl(a, s, n, 0); } +inline void Assembler::rotrdi( Register a, Register s, int n) { Assembler::rldicl(a, s, 64-n, 0); } +inline void Assembler::rotlwi( Register a, Register s, int n) { Assembler::rlwinm(a, s, n, 0, 31); } +inline void Assembler::rotrwi( Register a, Register s, int n) { Assembler::rlwinm(a, s, 32-n, 0, 31); } + +inline void Assembler::rldic( Register a, Register s, int sh6, int mb6) { emit_int32(RLDIC_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(0)); } +inline void Assembler::rldic_( Register a, Register s, int sh6, int mb6) { emit_int32(RLDIC_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(1)); } +inline void Assembler::rldicr( Register a, Register s, int sh6, int mb6) { emit_int32(RLDICR_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(0)); } +inline void Assembler::rldicr_( Register a, Register s, int sh6, int mb6) { emit_int32(RLDICR_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(1)); } +inline void Assembler::rldicl( Register a, Register s, int sh6, int me6) { emit_int32(RLDICL_OPCODE | rta(a) | rs(s) | sh162030(sh6) | me2126(me6) | rc(0)); } +inline void Assembler::rldicl_( Register a, Register s, int sh6, int me6) { emit_int32(RLDICL_OPCODE | rta(a) | rs(s) | sh162030(sh6) | me2126(me6) | rc(1)); } +inline void Assembler::rlwinm( Register a, Register s, int sh5, int mb5, int me5){ emit_int32(RLWINM_OPCODE | rta(a) | rs(s) | sh1620(sh5) | mb2125(mb5) | me2630(me5) | rc(0)); } +inline void Assembler::rlwinm_( Register a, Register s, int sh5, int mb5, int me5){ emit_int32(RLWINM_OPCODE | rta(a) | rs(s) | sh1620(sh5) | mb2125(mb5) | me2630(me5) | rc(1)); } +inline void Assembler::rldimi( Register a, Register s, int sh6, int mb6) { emit_int32(RLDIMI_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(0)); } +inline void Assembler::rlwimi( Register a, Register s, int sh5, int mb5, int me5){ emit_int32(RLWIMI_OPCODE | rta(a) | rs(s) | sh1620(sh5) | mb2125(mb5) | me2630(me5) | rc(0)); } +inline void Assembler::rldimi_( Register a, Register s, int sh6, int mb6) { emit_int32(RLDIMI_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(1)); } +inline void Assembler::insrdi( Register a, Register s, int n, int b) { Assembler::rldimi(a, s, 64-(b+n), b); } +inline void Assembler::insrwi( Register a, Register s, int n, int b) { Assembler::rlwimi(a, s, 32-(b+n), b, b+n-1); } + +// PPC 1, section 3.3.2 Fixed-Point Load Instructions +inline void Assembler::lwzx( Register d, Register s1, Register s2) { emit_int32(LWZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::lwz( Register d, int si16, Register s1) { emit_int32(LWZ_OPCODE | rt(d) | d1(si16) | ra0mem(s1));} +inline void Assembler::lwzu( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LWZU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));} + +inline void Assembler::lwax( Register d, Register s1, Register s2) { emit_int32(LWAX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::lwa( Register d, int si16, Register s1) { emit_int32(LWA_OPCODE | rt(d) | ds(si16) | ra0mem(s1));} + +inline void Assembler::lhzx( Register d, Register s1, Register s2) { emit_int32(LHZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::lhz( Register d, int si16, Register s1) { emit_int32(LHZ_OPCODE | rt(d) | d1(si16) | ra0mem(s1));} +inline void Assembler::lhzu( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LHZU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));} + +inline void Assembler::lhax( Register d, Register s1, Register s2) { emit_int32(LHAX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::lha( Register d, int si16, Register s1) { emit_int32(LHA_OPCODE | rt(d) | d1(si16) | ra0mem(s1));} +inline void Assembler::lhau( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LHAU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));} + +inline void Assembler::lbzx( Register d, Register s1, Register s2) { emit_int32(LBZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::lbz( Register d, int si16, Register s1) { emit_int32(LBZ_OPCODE | rt(d) | d1(si16) | ra0mem(s1));} +inline void Assembler::lbzu( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LBZU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));} + +inline void Assembler::ld( Register d, int si16, Register s1) { emit_int32(LD_OPCODE | rt(d) | ds(si16) | ra0mem(s1));} +inline void Assembler::ldx( Register d, Register s1, Register s2) { emit_int32(LDX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::ldu( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LDU_OPCODE | rt(d) | ds(si16) | rta0mem(s1));} + +// PPC 1, section 3.3.3 Fixed-Point Store Instructions +inline void Assembler::stwx( Register d, Register s1, Register s2) { emit_int32(STWX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::stw( Register d, int si16, Register s1) { emit_int32(STW_OPCODE | rs(d) | d1(si16) | ra0mem(s1));} +inline void Assembler::stwu( Register d, int si16, Register s1) { emit_int32(STWU_OPCODE | rs(d) | d1(si16) | rta0mem(s1));} + +inline void Assembler::sthx( Register d, Register s1, Register s2) { emit_int32(STHX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::sth( Register d, int si16, Register s1) { emit_int32(STH_OPCODE | rs(d) | d1(si16) | ra0mem(s1));} +inline void Assembler::sthu( Register d, int si16, Register s1) { emit_int32(STHU_OPCODE | rs(d) | d1(si16) | rta0mem(s1));} + +inline void Assembler::stbx( Register d, Register s1, Register s2) { emit_int32(STBX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::stb( Register d, int si16, Register s1) { emit_int32(STB_OPCODE | rs(d) | d1(si16) | ra0mem(s1));} +inline void Assembler::stbu( Register d, int si16, Register s1) { emit_int32(STBU_OPCODE | rs(d) | d1(si16) | rta0mem(s1));} + +inline void Assembler::std( Register d, int si16, Register s1) { emit_int32(STD_OPCODE | rs(d) | ds(si16) | ra0mem(s1));} +inline void Assembler::stdx( Register d, Register s1, Register s2) { emit_int32(STDX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::stdu( Register d, int si16, Register s1) { emit_int32(STDU_OPCODE | rs(d) | ds(si16) | rta0mem(s1));} +inline void Assembler::stdux(Register s, Register a, Register b) { emit_int32(STDUX_OPCODE| rs(s) | rta0mem(a) | rb(b));} + +// PPC 1, section 3.3.13 Move To/From System Register Instructions +inline void Assembler::mtlr( Register s1) { emit_int32(MTLR_OPCODE | rs(s1)); } +inline void Assembler::mflr( Register d ) { emit_int32(MFLR_OPCODE | rt(d)); } +inline void Assembler::mtctr(Register s1) { emit_int32(MTCTR_OPCODE | rs(s1)); } +inline void Assembler::mfctr(Register d ) { emit_int32(MFCTR_OPCODE | rt(d)); } +inline void Assembler::mtcrf(int afxm, Register s){ emit_int32(MTCRF_OPCODE | fxm(afxm) | rs(s)); } +inline void Assembler::mfcr( Register d ) { emit_int32(MFCR_OPCODE | rt(d)); } +inline void Assembler::mcrf( ConditionRegister crd, ConditionRegister cra) + { emit_int32(MCRF_OPCODE | bf(crd) | bfa(cra)); } +inline void Assembler::mtcr( Register s) { Assembler::mtcrf(0xff, s); } + +// SAP JVM 2006-02-13 PPC branch instruction. +// PPC 1, section 2.4.1 Branch Instructions +inline void Assembler::b( address a, relocInfo::relocType rt) { emit_data(BXX_OPCODE| li(disp( intptr_t(a), intptr_t(pc()))) |aa(0)|lk(0), rt); } +inline void Assembler::b( Label& L) { b( target(L)); } +inline void Assembler::bl(address a, relocInfo::relocType rt) { emit_data(BXX_OPCODE| li(disp( intptr_t(a), intptr_t(pc()))) |aa(0)|lk(1), rt); } +inline void Assembler::bl(Label& L) { bl(target(L)); } +inline void Assembler::bc( int boint, int biint, address a, relocInfo::relocType rt) { emit_data(BCXX_OPCODE| bo(boint) | bi(biint) | bd(disp( intptr_t(a), intptr_t(pc()))) | aa(0) | lk(0), rt); } +inline void Assembler::bc( int boint, int biint, Label& L) { bc(boint, biint, target(L)); } +inline void Assembler::bcl(int boint, int biint, address a, relocInfo::relocType rt) { emit_data(BCXX_OPCODE| bo(boint) | bi(biint) | bd(disp( intptr_t(a), intptr_t(pc()))) | aa(0)|lk(1)); } +inline void Assembler::bcl(int boint, int biint, Label& L) { bcl(boint, biint, target(L)); } + +inline void Assembler::bclr( int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCLR_OPCODE | bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(0), rt); } +inline void Assembler::bclrl( int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCLR_OPCODE | bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(1), rt); } +inline void Assembler::bcctr( int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCCTR_OPCODE| bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(0), rt); } +inline void Assembler::bcctrl(int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCCTR_OPCODE| bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(1), rt); } + +// helper function for b +inline bool Assembler::is_within_range_of_b(address a, address pc) { + // Guard against illegal branch targets, e.g. -1 (see CompiledStaticCall and ad-file). + if ((((uint64_t)a) & 0x3) != 0) return false; + + const int range = 1 << (29-6); // li field is from bit 6 to bit 29. + int value = disp(intptr_t(a), intptr_t(pc)); + bool result = -range <= value && value < range-1; +#ifdef ASSERT + if (result) li(value); // Assert that value is in correct range. +#endif + return result; +} + +// helper functions for bcxx. +inline bool Assembler::is_within_range_of_bcxx(address a, address pc) { + // Guard against illegal branch targets, e.g. -1 (see CompiledStaticCall and ad-file). + if ((((uint64_t)a) & 0x3) != 0) return false; + + const int range = 1 << (29-16); // bd field is from bit 16 to bit 29. + int value = disp(intptr_t(a), intptr_t(pc)); + bool result = -range <= value && value < range-1; +#ifdef ASSERT + if (result) bd(value); // Assert that value is in correct range. +#endif + return result; +} + +// Get the destination of a bxx branch (b, bl, ba, bla). +address Assembler::bxx_destination(address baddr) { return bxx_destination(*(int*)baddr, baddr); } +address Assembler::bxx_destination(int instr, address pc) { return (address)bxx_destination_offset(instr, (intptr_t)pc); } +intptr_t Assembler::bxx_destination_offset(int instr, intptr_t bxx_pos) { + intptr_t displ = inv_li_field(instr); + return bxx_pos + displ; +} + +// Extended mnemonics for Branch Instructions +inline void Assembler::blt(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, less), L); } +inline void Assembler::bgt(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, greater), L); } +inline void Assembler::beq(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, equal), L); } +inline void Assembler::bso(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, summary_overflow), L); } +inline void Assembler::bge(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, less), L); } +inline void Assembler::ble(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, greater), L); } +inline void Assembler::bne(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, equal), L); } +inline void Assembler::bns(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, summary_overflow), L); } + +// Branch instructions with static prediction hints. +inline void Assembler::blt_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken, bi0(crx, less), L); } +inline void Assembler::bgt_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken, bi0(crx, greater), L); } +inline void Assembler::beq_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken, bi0(crx, equal), L); } +inline void Assembler::bso_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken, bi0(crx, summary_overflow), L); } +inline void Assembler::bge_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken, bi0(crx, less), L); } +inline void Assembler::ble_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken, bi0(crx, greater), L); } +inline void Assembler::bne_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken, bi0(crx, equal), L); } +inline void Assembler::bns_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken, bi0(crx, summary_overflow), L); } +inline void Assembler::blt_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, less), L); } +inline void Assembler::bgt_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, greater), L); } +inline void Assembler::beq_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, equal), L); } +inline void Assembler::bso_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, summary_overflow), L); } +inline void Assembler::bge_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, less), L); } +inline void Assembler::ble_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, greater), L); } +inline void Assembler::bne_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, equal), L); } +inline void Assembler::bns_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, summary_overflow), L); } + +// For use in conjunction with testbitdi: +inline void Assembler::btrue( ConditionRegister crx, Label& L) { Assembler::bne(crx, L); } +inline void Assembler::bfalse(ConditionRegister crx, Label& L) { Assembler::beq(crx, L); } + +inline void Assembler::bltl(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, less), L); } +inline void Assembler::bgtl(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, greater), L); } +inline void Assembler::beql(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, equal), L); } +inline void Assembler::bsol(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, summary_overflow), L); } +inline void Assembler::bgel(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, less), L); } +inline void Assembler::blel(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, greater), L); } +inline void Assembler::bnel(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, equal), L); } +inline void Assembler::bnsl(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, summary_overflow), L); } + +// Extended mnemonics for Branch Instructions via LR. +// We use `blr' for returns. +inline void Assembler::blr(relocInfo::relocType rt) { Assembler::bclr(bcondAlways, 0, bhintbhBCLRisReturn, rt); } + +// Extended mnemonics for Branch Instructions with CTR. +// Bdnz means `decrement CTR and jump to L if CTR is not zero'. +inline void Assembler::bdnz(Label& L) { Assembler::bc(16, 0, L); } +// Decrement and branch if result is zero. +inline void Assembler::bdz(Label& L) { Assembler::bc(18, 0, L); } +// We use `bctr[l]' for jumps/calls in function descriptor glue +// code, e.g. for calls to runtime functions. +inline void Assembler::bctr( relocInfo::relocType rt) { Assembler::bcctr(bcondAlways, 0, bhintbhBCCTRisNotReturnButSame, rt); } +inline void Assembler::bctrl(relocInfo::relocType rt) { Assembler::bcctrl(bcondAlways, 0, bhintbhBCCTRisNotReturnButSame, rt); } +// Conditional jumps/branches via CTR. +inline void Assembler::beqctr( ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctr( bcondCRbiIs1, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); } +inline void Assembler::beqctrl(ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctrl(bcondCRbiIs1, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); } +inline void Assembler::bnectr( ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctr( bcondCRbiIs0, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); } +inline void Assembler::bnectrl(ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctrl(bcondCRbiIs0, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); } + +// condition register logic instructions +inline void Assembler::crand( int d, int s1, int s2) { emit_int32(CRAND_OPCODE | bt(d) | ba(s1) | bb(s2)); } +inline void Assembler::crnand(int d, int s1, int s2) { emit_int32(CRNAND_OPCODE | bt(d) | ba(s1) | bb(s2)); } +inline void Assembler::cror( int d, int s1, int s2) { emit_int32(CROR_OPCODE | bt(d) | ba(s1) | bb(s2)); } +inline void Assembler::crxor( int d, int s1, int s2) { emit_int32(CRXOR_OPCODE | bt(d) | ba(s1) | bb(s2)); } +inline void Assembler::crnor( int d, int s1, int s2) { emit_int32(CRNOR_OPCODE | bt(d) | ba(s1) | bb(s2)); } +inline void Assembler::creqv( int d, int s1, int s2) { emit_int32(CREQV_OPCODE | bt(d) | ba(s1) | bb(s2)); } +inline void Assembler::crandc(int d, int s1, int s2) { emit_int32(CRANDC_OPCODE | bt(d) | ba(s1) | bb(s2)); } +inline void Assembler::crorc( int d, int s1, int s2) { emit_int32(CRORC_OPCODE | bt(d) | ba(s1) | bb(s2)); } + +// PPC 2, section 3.2.1 Instruction Cache Instructions +inline void Assembler::icbi( Register s1, Register s2) { emit_int32( ICBI_OPCODE | ra0mem(s1) | rb(s2) ); } +// PPC 2, section 3.2.2 Data Cache Instructions +//inline void Assembler::dcba( Register s1, Register s2) { emit_int32( DCBA_OPCODE | ra0mem(s1) | rb(s2) ); } +inline void Assembler::dcbz( Register s1, Register s2) { emit_int32( DCBZ_OPCODE | ra0mem(s1) | rb(s2) ); } +inline void Assembler::dcbst( Register s1, Register s2) { emit_int32( DCBST_OPCODE | ra0mem(s1) | rb(s2) ); } +inline void Assembler::dcbf( Register s1, Register s2) { emit_int32( DCBF_OPCODE | ra0mem(s1) | rb(s2) ); } +// dcache read hint +inline void Assembler::dcbt( Register s1, Register s2) { emit_int32( DCBT_OPCODE | ra0mem(s1) | rb(s2) ); } +inline void Assembler::dcbtct( Register s1, Register s2, int ct) { emit_int32( DCBT_OPCODE | ra0mem(s1) | rb(s2) | thct(ct)); } +inline void Assembler::dcbtds( Register s1, Register s2, int ds) { emit_int32( DCBT_OPCODE | ra0mem(s1) | rb(s2) | thds(ds)); } +// dcache write hint +inline void Assembler::dcbtst( Register s1, Register s2) { emit_int32( DCBTST_OPCODE | ra0mem(s1) | rb(s2) ); } +inline void Assembler::dcbtstct(Register s1, Register s2, int ct) { emit_int32( DCBTST_OPCODE | ra0mem(s1) | rb(s2) | thct(ct)); } + +// machine barrier instructions: +inline void Assembler::sync(int a) { emit_int32( SYNC_OPCODE | l910(a)); } +inline void Assembler::sync() { Assembler::sync(0); } +inline void Assembler::lwsync() { Assembler::sync(1); } +inline void Assembler::ptesync() { Assembler::sync(2); } +inline void Assembler::eieio() { emit_int32( EIEIO_OPCODE); } +inline void Assembler::isync() { emit_int32( ISYNC_OPCODE); } + +inline void Assembler::release() { Assembler::lwsync(); } +inline void Assembler::acquire() { Assembler::lwsync(); } +inline void Assembler::fence() { Assembler::sync(); } + +// atomics +// Use ra0mem to disallow R0 as base. +inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } +inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } +inline bool Assembler::lxarx_hint_exclusive_access() { return VM_Version::has_lxarxeh(); } +inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::stwcx_(Register s, Register a, Register b) { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } +inline void Assembler::stdcx_(Register s, Register a, Register b) { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } + +// Instructions for adjusting thread priority +// for simultaneous multithreading (SMT) on POWER5. +inline void Assembler::smt_prio_very_low() { Assembler::or_unchecked(R31, R31, R31); } +inline void Assembler::smt_prio_low() { Assembler::or_unchecked(R1, R1, R1); } +inline void Assembler::smt_prio_medium_low() { Assembler::or_unchecked(R6, R6, R6); } +inline void Assembler::smt_prio_medium() { Assembler::or_unchecked(R2, R2, R2); } +inline void Assembler::smt_prio_medium_high() { Assembler::or_unchecked(R5, R5, R5); } +inline void Assembler::smt_prio_high() { Assembler::or_unchecked(R3, R3, R3); } + +inline void Assembler::twi_0(Register a) { twi_unchecked(0, a, 0);} + +// trap instructions +inline void Assembler::tdi_unchecked(int tobits, Register a, int si16){ emit_int32( TDI_OPCODE | to(tobits) | ra(a) | si(si16)); } +inline void Assembler::twi_unchecked(int tobits, Register a, int si16){ emit_int32( TWI_OPCODE | to(tobits) | ra(a) | si(si16)); } +inline void Assembler::tdi(int tobits, Register a, int si16) { assert(UseSIGTRAP, "precondition"); tdi_unchecked(tobits, a, si16); } +inline void Assembler::twi(int tobits, Register a, int si16) { assert(UseSIGTRAP, "precondition"); twi_unchecked(tobits, a, si16); } +inline void Assembler::td( int tobits, Register a, Register b) { assert(UseSIGTRAP, "precondition"); emit_int32( TD_OPCODE | to(tobits) | ra(a) | rb(b)); } +inline void Assembler::tw( int tobits, Register a, Register b) { assert(UseSIGTRAP, "precondition"); emit_int32( TW_OPCODE | to(tobits) | ra(a) | rb(b)); } + +// FLOATING POINT instructions ppc. +// PPC 1, section 4.6.2 Floating-Point Load Instructions +// Use ra0mem instead of ra in some instructions below. +inline void Assembler::lfs( FloatRegister d, int si16, Register a) { emit_int32( LFS_OPCODE | frt(d) | ra0mem(a) | simm(si16,16)); } +inline void Assembler::lfsu(FloatRegister d, int si16, Register a) { emit_int32( LFSU_OPCODE | frt(d) | ra(a) | simm(si16,16)); } +inline void Assembler::lfsx(FloatRegister d, Register a, Register b) { emit_int32( LFSX_OPCODE | frt(d) | ra0mem(a) | rb(b)); } +inline void Assembler::lfd( FloatRegister d, int si16, Register a) { emit_int32( LFD_OPCODE | frt(d) | ra0mem(a) | simm(si16,16)); } +inline void Assembler::lfdu(FloatRegister d, int si16, Register a) { emit_int32( LFDU_OPCODE | frt(d) | ra(a) | simm(si16,16)); } +inline void Assembler::lfdx(FloatRegister d, Register a, Register b) { emit_int32( LFDX_OPCODE | frt(d) | ra0mem(a) | rb(b)); } + +// PPC 1, section 4.6.3 Floating-Point Store Instructions +// Use ra0mem instead of ra in some instructions below. +inline void Assembler::stfs( FloatRegister s, int si16, Register a) { emit_int32( STFS_OPCODE | frs(s) | ra0mem(a) | simm(si16,16)); } +inline void Assembler::stfsu(FloatRegister s, int si16, Register a) { emit_int32( STFSU_OPCODE | frs(s) | ra(a) | simm(si16,16)); } +inline void Assembler::stfsx(FloatRegister s, Register a, Register b){ emit_int32( STFSX_OPCODE | frs(s) | ra0mem(a) | rb(b)); } +inline void Assembler::stfd( FloatRegister s, int si16, Register a) { emit_int32( STFD_OPCODE | frs(s) | ra0mem(a) | simm(si16,16)); } +inline void Assembler::stfdu(FloatRegister s, int si16, Register a) { emit_int32( STFDU_OPCODE | frs(s) | ra(a) | simm(si16,16)); } +inline void Assembler::stfdx(FloatRegister s, Register a, Register b){ emit_int32( STFDX_OPCODE | frs(s) | ra0mem(a) | rb(b)); } + +// PPC 1, section 4.6.4 Floating-Point Move Instructions +inline void Assembler::fmr( FloatRegister d, FloatRegister b) { emit_int32( FMR_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fmr_(FloatRegister d, FloatRegister b) { emit_int32( FMR_OPCODE | frt(d) | frb(b) | rc(1)); } + +// These are special Power6 opcodes, reused for "lfdepx" and "stfdepx" +// on Power7. Do not use. +//inline void Assembler::mffgpr( FloatRegister d, Register b) { emit_int32( MFFGPR_OPCODE | frt(d) | rb(b) | rc(0)); } +//inline void Assembler::mftgpr( Register d, FloatRegister b) { emit_int32( MFTGPR_OPCODE | rt(d) | frb(b) | rc(0)); } +// add cmpb and popcntb to detect ppc power version. +inline void Assembler::cmpb( Register a, Register s, Register b) { emit_int32( CMPB_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::popcntb(Register a, Register s) { emit_int32( POPCNTB_OPCODE | rta(a) | rs(s)); }; +inline void Assembler::popcntw(Register a, Register s) { emit_int32( POPCNTW_OPCODE | rta(a) | rs(s)); }; +inline void Assembler::popcntd(Register a, Register s) { emit_int32( POPCNTD_OPCODE | rta(a) | rs(s)); }; + +inline void Assembler::fneg( FloatRegister d, FloatRegister b) { emit_int32( FNEG_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fneg_( FloatRegister d, FloatRegister b) { emit_int32( FNEG_OPCODE | frt(d) | frb(b) | rc(1)); } +inline void Assembler::fabs( FloatRegister d, FloatRegister b) { emit_int32( FABS_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fabs_( FloatRegister d, FloatRegister b) { emit_int32( FABS_OPCODE | frt(d) | frb(b) | rc(1)); } +inline void Assembler::fnabs( FloatRegister d, FloatRegister b) { emit_int32( FNABS_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fnabs_(FloatRegister d, FloatRegister b) { emit_int32( FNABS_OPCODE | frt(d) | frb(b) | rc(1)); } + +// PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic Instructions +inline void Assembler::fadd( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADD_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); } +inline void Assembler::fadd_( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADD_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); } +inline void Assembler::fadds( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADDS_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); } +inline void Assembler::fadds_(FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADDS_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); } +inline void Assembler::fsub( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUB_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); } +inline void Assembler::fsub_( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUB_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); } +inline void Assembler::fsubs( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUBS_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); } +inline void Assembler::fsubs_(FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUBS_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); } +inline void Assembler::fmul( FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMUL_OPCODE | frt(d) | fra(a) | frc(c) | rc(0)); } +inline void Assembler::fmul_( FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMUL_OPCODE | frt(d) | fra(a) | frc(c) | rc(1)); } +inline void Assembler::fmuls( FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMULS_OPCODE | frt(d) | fra(a) | frc(c) | rc(0)); } +inline void Assembler::fmuls_(FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMULS_OPCODE | frt(d) | fra(a) | frc(c) | rc(1)); } +inline void Assembler::fdiv( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIV_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); } +inline void Assembler::fdiv_( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIV_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); } +inline void Assembler::fdivs( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIVS_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); } +inline void Assembler::fdivs_(FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIVS_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); } + +// PPC 1, section 4.6.6 Floating-Point Rounding and Conversion Instructions +inline void Assembler::frsp( FloatRegister d, FloatRegister b) { emit_int32( FRSP_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fctid( FloatRegister d, FloatRegister b) { emit_int32( FCTID_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fctidz(FloatRegister d, FloatRegister b) { emit_int32( FCTIDZ_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fctiw( FloatRegister d, FloatRegister b) { emit_int32( FCTIW_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fctiwz(FloatRegister d, FloatRegister b) { emit_int32( FCTIWZ_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fcfid( FloatRegister d, FloatRegister b) { emit_int32( FCFID_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fcfids(FloatRegister d, FloatRegister b) { emit_int32( FCFIDS_OPCODE | frt(d) | frb(b) | rc(0)); } + +// PPC 1, section 4.6.7 Floating-Point Compare Instructions +inline void Assembler::fcmpu( ConditionRegister crx, FloatRegister a, FloatRegister b) { emit_int32( FCMPU_OPCODE | bf(crx) | fra(a) | frb(b)); } + +// PPC 1, section 5.2.1 Floating-Point Arithmetic Instructions +inline void Assembler::fsqrt( FloatRegister d, FloatRegister b) { emit_int32( FSQRT_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fsqrts(FloatRegister d, FloatRegister b) { emit_int32( FSQRTS_OPCODE | frt(d) | frb(b) | rc(0)); } + +// Vector instructions for >= Power6. +inline void Assembler::lvebx( VectorRegister d, Register s1, Register s2) { emit_int32( LVEBX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::lvehx( VectorRegister d, Register s1, Register s2) { emit_int32( LVEHX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::lvewx( VectorRegister d, Register s1, Register s2) { emit_int32( LVEWX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::lvx( VectorRegister d, Register s1, Register s2) { emit_int32( LVX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::lvxl( VectorRegister d, Register s1, Register s2) { emit_int32( LVXL_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::stvebx(VectorRegister d, Register s1, Register s2) { emit_int32( STVEBX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::stvehx(VectorRegister d, Register s1, Register s2) { emit_int32( STVEHX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::stvewx(VectorRegister d, Register s1, Register s2) { emit_int32( STVEWX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::stvx( VectorRegister d, Register s1, Register s2) { emit_int32( STVX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::stvxl( VectorRegister d, Register s1, Register s2) { emit_int32( STVXL_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::lvsl( VectorRegister d, Register s1, Register s2) { emit_int32( LVSL_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::lvsr( VectorRegister d, Register s1, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } + +inline void Assembler::vpkpx( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKPX_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkshss( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSHSS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkswss( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSWSS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkshus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSHUS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkswus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSWUS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkuhum( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUHUM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkuwum( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUWUM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkuhus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUHUS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkuwus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUWUS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vupkhpx( VectorRegister d, VectorRegister b) { emit_int32( VUPKHPX_OPCODE | vrt(d) | vrb(b)); } +inline void Assembler::vupkhsb( VectorRegister d, VectorRegister b) { emit_int32( VUPKHSB_OPCODE | vrt(d) | vrb(b)); } +inline void Assembler::vupkhsh( VectorRegister d, VectorRegister b) { emit_int32( VUPKHSH_OPCODE | vrt(d) | vrb(b)); } +inline void Assembler::vupklpx( VectorRegister d, VectorRegister b) { emit_int32( VUPKLPX_OPCODE | vrt(d) | vrb(b)); } +inline void Assembler::vupklsb( VectorRegister d, VectorRegister b) { emit_int32( VUPKLSB_OPCODE | vrt(d) | vrb(b)); } +inline void Assembler::vupklsh( VectorRegister d, VectorRegister b) { emit_int32( VUPKLSH_OPCODE | vrt(d) | vrb(b)); } +inline void Assembler::vmrghb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGHB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmrghw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGHW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmrghh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGHH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmrglb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGLB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmrglw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGLW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmrglh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGLH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsplt( VectorRegister d, int ui4, VectorRegister b) { emit_int32( VSPLT_OPCODE | vrt(d) | vsplt_uim(uimm(ui4,4)) | vrb(b)); } +inline void Assembler::vsplth( VectorRegister d, int ui3, VectorRegister b) { emit_int32( VSPLTH_OPCODE | vrt(d) | vsplt_uim(uimm(ui3,3)) | vrb(b)); } +inline void Assembler::vspltw( VectorRegister d, int ui2, VectorRegister b) { emit_int32( VSPLTW_OPCODE | vrt(d) | vsplt_uim(uimm(ui2,2)) | vrb(b)); } +inline void Assembler::vspltisb(VectorRegister d, int si5) { emit_int32( VSPLTISB_OPCODE| vrt(d) | vsplti_sim(simm(si5,5))); } +inline void Assembler::vspltish(VectorRegister d, int si5) { emit_int32( VSPLTISH_OPCODE| vrt(d) | vsplti_sim(simm(si5,5))); } +inline void Assembler::vspltisw(VectorRegister d, int si5) { emit_int32( VSPLTISW_OPCODE| vrt(d) | vsplti_sim(simm(si5,5))); } +inline void Assembler::vperm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c){ emit_int32( VPERM_OPCODE | vrt(d) | vra(a) | vrb(b) | vrc(c)); } +inline void Assembler::vsel( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c){ emit_int32( VSEL_OPCODE | vrt(d) | vra(a) | vrb(b) | vrc(c)); } +inline void Assembler::vsl( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSL_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsldoi( VectorRegister d, VectorRegister a, VectorRegister b, int si4) { emit_int32( VSLDOI_OPCODE| vrt(d) | vra(a) | vrb(b) | vsldoi_shb(simm(si4,4))); } +inline void Assembler::vslo( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSLO_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsr( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSR_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsro( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRO_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vaddcuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDCUW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vaddshs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDSHS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vaddsbs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDSBS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vaddsws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDSWS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vaddubm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUBM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vadduwm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUWM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vadduhm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUHM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vaddubs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUBS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vadduws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUWS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vadduhs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUHS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubcuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBCUW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubshs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBSHS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubsbs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBSBS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubsws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBSWS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsububm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUBM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubuwm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUWM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubuhm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUHM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsububs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUBS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubuws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUWS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubuhs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUHS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmulesb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULESB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmuleub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULEUB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmulesh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULESH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmuleuh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULEUH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmulosb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOSB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmuloub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOUB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmulosh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOSH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmulouh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOUH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmhaddshs(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMHADDSHS_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmhraddshs(VectorRegister d,VectorRegister a,VectorRegister b, VectorRegister c) { emit_int32( VMHRADDSHS_OPCODE| vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmladduhm(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMLADDUHM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmsubuhm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUBUHM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmsummbm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMMBM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmsumshm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMSHM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmsumshs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMSHS_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmsumuhm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMUHM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmsumuhs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMUHS_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vsumsws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUMSWS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsum2sws(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM2SWS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsum4sbs(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM4SBS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsum4ubs(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM4UBS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsum4shs(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM4SHS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vavgsb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGSB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vavgsw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGSW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vavgsh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGSH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vavgub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGUB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vavguw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGUW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vavguh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGUH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmaxsb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmaxsw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmaxsh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmaxub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmaxuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmaxuh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vminsb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vminsw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vminsh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vminub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vminuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vminuh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vcmpequb(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpequh(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpequw(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpgtsh(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpgtsb(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpgtsw(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpgtub(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpgtuh(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpgtuw(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpequb_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpequh_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpequw_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpgtsh_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpgtsb_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpgtsw_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpgtub_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpgtuh_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpgtuw_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vand( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAND_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vandc( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VANDC_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vnor( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VNOR_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vor( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VOR_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vxor( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VXOR_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vrlb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vrlw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vrlh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vslb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSLB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vskw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSKW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vslh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSLH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsrb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsrw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsrh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsrab( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsraw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsrah( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::mtvscr( VectorRegister b) { emit_int32( MTVSCR_OPCODE | vrb(b)); } +inline void Assembler::mfvscr( VectorRegister d) { emit_int32( MFVSCR_OPCODE | vrt(d)); } + +// ra0 version +inline void Assembler::lwzx( Register d, Register s2) { emit_int32( LWZX_OPCODE | rt(d) | rb(s2));} +inline void Assembler::lwz( Register d, int si16 ) { emit_int32( LWZ_OPCODE | rt(d) | d1(si16));} +inline void Assembler::lwax( Register d, Register s2) { emit_int32( LWAX_OPCODE | rt(d) | rb(s2));} +inline void Assembler::lwa( Register d, int si16 ) { emit_int32( LWA_OPCODE | rt(d) | ds(si16));} +inline void Assembler::lhzx( Register d, Register s2) { emit_int32( LHZX_OPCODE | rt(d) | rb(s2));} +inline void Assembler::lhz( Register d, int si16 ) { emit_int32( LHZ_OPCODE | rt(d) | d1(si16));} +inline void Assembler::lhax( Register d, Register s2) { emit_int32( LHAX_OPCODE | rt(d) | rb(s2));} +inline void Assembler::lha( Register d, int si16 ) { emit_int32( LHA_OPCODE | rt(d) | d1(si16));} +inline void Assembler::lbzx( Register d, Register s2) { emit_int32( LBZX_OPCODE | rt(d) | rb(s2));} +inline void Assembler::lbz( Register d, int si16 ) { emit_int32( LBZ_OPCODE | rt(d) | d1(si16));} +inline void Assembler::ld( Register d, int si16 ) { emit_int32( LD_OPCODE | rt(d) | ds(si16));} +inline void Assembler::ldx( Register d, Register s2) { emit_int32( LDX_OPCODE | rt(d) | rb(s2));} +inline void Assembler::stwx( Register d, Register s2) { emit_int32( STWX_OPCODE | rs(d) | rb(s2));} +inline void Assembler::stw( Register d, int si16 ) { emit_int32( STW_OPCODE | rs(d) | d1(si16));} +inline void Assembler::sthx( Register d, Register s2) { emit_int32( STHX_OPCODE | rs(d) | rb(s2));} +inline void Assembler::sth( Register d, int si16 ) { emit_int32( STH_OPCODE | rs(d) | d1(si16));} +inline void Assembler::stbx( Register d, Register s2) { emit_int32( STBX_OPCODE | rs(d) | rb(s2));} +inline void Assembler::stb( Register d, int si16 ) { emit_int32( STB_OPCODE | rs(d) | d1(si16));} +inline void Assembler::std( Register d, int si16 ) { emit_int32( STD_OPCODE | rs(d) | ds(si16));} +inline void Assembler::stdx( Register d, Register s2) { emit_int32( STDX_OPCODE | rs(d) | rb(s2));} + +// ra0 version +inline void Assembler::icbi( Register s2) { emit_int32( ICBI_OPCODE | rb(s2) ); } +//inline void Assembler::dcba( Register s2) { emit_int32( DCBA_OPCODE | rb(s2) ); } +inline void Assembler::dcbz( Register s2) { emit_int32( DCBZ_OPCODE | rb(s2) ); } +inline void Assembler::dcbst( Register s2) { emit_int32( DCBST_OPCODE | rb(s2) ); } +inline void Assembler::dcbf( Register s2) { emit_int32( DCBF_OPCODE | rb(s2) ); } +inline void Assembler::dcbt( Register s2) { emit_int32( DCBT_OPCODE | rb(s2) ); } +inline void Assembler::dcbtct( Register s2, int ct) { emit_int32( DCBT_OPCODE | rb(s2) | thct(ct)); } +inline void Assembler::dcbtds( Register s2, int ds) { emit_int32( DCBT_OPCODE | rb(s2) | thds(ds)); } +inline void Assembler::dcbtst( Register s2) { emit_int32( DCBTST_OPCODE | rb(s2) ); } +inline void Assembler::dcbtstct(Register s2, int ct) { emit_int32( DCBTST_OPCODE | rb(s2) | thct(ct)); } + +// ra0 version +inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } +inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } +inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::stwcx_(Register s, Register b) { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); } +inline void Assembler::stdcx_(Register s, Register b) { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); } + +// ra0 version +inline void Assembler::lfs( FloatRegister d, int si16) { emit_int32( LFS_OPCODE | frt(d) | simm(si16,16)); } +inline void Assembler::lfsx(FloatRegister d, Register b) { emit_int32( LFSX_OPCODE | frt(d) | rb(b)); } +inline void Assembler::lfd( FloatRegister d, int si16) { emit_int32( LFD_OPCODE | frt(d) | simm(si16,16)); } +inline void Assembler::lfdx(FloatRegister d, Register b) { emit_int32( LFDX_OPCODE | frt(d) | rb(b)); } + +// ra0 version +inline void Assembler::stfs( FloatRegister s, int si16) { emit_int32( STFS_OPCODE | frs(s) | simm(si16, 16)); } +inline void Assembler::stfsx(FloatRegister s, Register b) { emit_int32( STFSX_OPCODE | frs(s) | rb(b)); } +inline void Assembler::stfd( FloatRegister s, int si16) { emit_int32( STFD_OPCODE | frs(s) | simm(si16, 16)); } +inline void Assembler::stfdx(FloatRegister s, Register b) { emit_int32( STFDX_OPCODE | frs(s) | rb(b)); } + +// ra0 version +inline void Assembler::lvebx( VectorRegister d, Register s2) { emit_int32( LVEBX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::lvehx( VectorRegister d, Register s2) { emit_int32( LVEHX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::lvewx( VectorRegister d, Register s2) { emit_int32( LVEWX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::lvx( VectorRegister d, Register s2) { emit_int32( LVX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::lvxl( VectorRegister d, Register s2) { emit_int32( LVXL_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::stvebx(VectorRegister d, Register s2) { emit_int32( STVEBX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::stvehx(VectorRegister d, Register s2) { emit_int32( STVEHX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::stvewx(VectorRegister d, Register s2) { emit_int32( STVEWX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::stvx( VectorRegister d, Register s2) { emit_int32( STVX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::stvxl( VectorRegister d, Register s2) { emit_int32( STVXL_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::lvsl( VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::lvsr( VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | rb(s2)); } + + +inline void Assembler::load_const(Register d, void* x, Register tmp) { + load_const(d, (long)x, tmp); +} + +// Load a 64 bit constant encoded by a `Label'. This works for bound +// labels as well as unbound ones. For unbound labels, the code will +// be patched as soon as the label gets bound. +inline void Assembler::load_const(Register d, Label& L, Register tmp) { + load_const(d, target(L), tmp); +} + +// Load a 64 bit constant encoded by an AddressLiteral. patchable. +inline void Assembler::load_const(Register d, AddressLiteral& a, Register tmp) { + assert(d != R0, "R0 not allowed"); + // First relocate (we don't change the offset in the RelocationHolder, + // just pass a.rspec()), then delegate to load_const(Register, long). + relocate(a.rspec()); + load_const(d, (long)a.value(), tmp); +} + + +#endif // CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP diff --git a/src/cpu/ppc/vm/bytecodeInterpreter_ppc.hpp b/src/cpu/ppc/vm/bytecodeInterpreter_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/bytecodeInterpreter_ppc.hpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_BYTECODEINTERPRETER_PPC_HPP +#define CPU_PPC_VM_BYTECODEINTERPRETER_PPC_HPP + +// Platform specific for C++ based Interpreter +#define LOTS_OF_REGS /* Lets interpreter use plenty of registers */ + +private: + + // Save the bottom of the stack after frame manager setup. For ease of restoration after return + // from recursive interpreter call. + intptr_t* _frame_bottom; // Saved bottom of frame manager frame. + address _last_Java_pc; // Pc to return to in frame manager. + intptr_t* _last_Java_fp; // frame pointer + intptr_t* _last_Java_sp; // stack pointer + interpreterState _self_link; // Previous interpreter state // sometimes points to self??? + double _native_fresult; // Save result of native calls that might return floats. + intptr_t _native_lresult; // Save result of native calls that might return handle/longs. + +public: + address last_Java_pc(void) { return _last_Java_pc; } + intptr_t* last_Java_fp(void) { return _last_Java_fp; } + + static ByteSize native_lresult_offset() { + return byte_offset_of(BytecodeInterpreter, _native_lresult); + } + + static ByteSize native_fresult_offset() { + return byte_offset_of(BytecodeInterpreter, _native_fresult); + } + + static void pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp); + +#define SET_LAST_JAVA_FRAME() THREAD->frame_anchor()->set(istate->_last_Java_sp, istate->_last_Java_pc); +#define RESET_LAST_JAVA_FRAME() THREAD->frame_anchor()->clear(); + + +// Macros for accessing the stack. +#undef STACK_INT +#undef STACK_FLOAT +#undef STACK_ADDR +#undef STACK_OBJECT +#undef STACK_DOUBLE +#undef STACK_LONG + +// JavaStack Implementation +#define STACK_SLOT(offset) ((address) &topOfStack[-(offset)]) +#define STACK_INT(offset) (*((jint*) &topOfStack[-(offset)])) +#define STACK_FLOAT(offset) (*((jfloat *) &topOfStack[-(offset)])) +#define STACK_OBJECT(offset) (*((oop *) &topOfStack [-(offset)])) +#define STACK_DOUBLE(offset) (((VMJavaVal64*) &topOfStack[-(offset)])->d) +#define STACK_LONG(offset) (((VMJavaVal64 *) &topOfStack[-(offset)])->l) + +#define SET_STACK_SLOT(value, offset) (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value)) +#define SET_STACK_ADDR(value, offset) (*((address *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_INT(value, offset) (*((jint *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_FLOAT(value, offset) (*((jfloat *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value)) +#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = \ + ((VMJavaVal64*)(addr))->d) +#define SET_STACK_LONG(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value)) +#define SET_STACK_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = \ + ((VMJavaVal64*)(addr))->l) +// JavaLocals implementation + +#define LOCALS_SLOT(offset) ((intptr_t*)&locals[-(offset)]) +#define LOCALS_ADDR(offset) ((address)locals[-(offset)]) +#define LOCALS_INT(offset) (*(jint*)&(locals[-(offset)])) +#define LOCALS_OBJECT(offset) ((oop)locals[-(offset)]) +#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)])) +#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)])) + +#define SET_LOCALS_SLOT(value, offset) (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value)) +#define SET_LOCALS_INT(value, offset) (*((jint *)&locals[-(offset)]) = (value)) +#define SET_LOCALS_DOUBLE(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value)) +#define SET_LOCALS_LONG(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value)) +#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \ + + +#endif // CPU_PPC_VM_BYTECODEINTERPRETER_PPC_PP diff --git a/src/cpu/ppc/vm/bytecodeInterpreter_ppc.inline.hpp b/src/cpu/ppc/vm/bytecodeInterpreter_ppc.inline.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/bytecodeInterpreter_ppc.inline.hpp @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_BYTECODEINTERPRETER_PPC_INLINE_HPP +#define CPU_PPC_VM_BYTECODEINTERPRETER_PPC_INLINE_HPP + +#ifdef CC_INTERP + +// Inline interpreter functions for ppc. + +#include + +inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; } +inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; } +inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; } +inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; } +inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return (jfloat)fmod((double)op1, (double)op2); } + +inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; } + +inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) { + return ( op1 < op2 ? -1 : + op1 > op2 ? 1 : + op1 == op2 ? 0 : + (direction == -1 || direction == 1) ? direction : 0); + +} + +inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) { + to[0] = from[0]; to[1] = from[1]; +} + +// The long operations depend on compiler support for "long long" on ppc. + +inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) { + return op1 + op2; +} + +inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) { + return op1 & op2; +} + +inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) { + if (op1 == min_jlong && op2 == -1) return op1; + return op1 / op2; +} + +inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) { + return op1 * op2; +} + +inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) { + return op1 | op2; +} + +inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) { + return op1 - op2; +} + +inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) { + return op1 ^ op2; +} + +inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) { + if (op1 == min_jlong && op2 == -1) return 0; + return op1 % op2; +} + +inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) { + return ((uint64_t) op1) >> (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) { + return op1 >> (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) { + return op1 << (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongNeg(jlong op) { + return -op; +} + +inline jlong BytecodeInterpreter::VMlongNot(jlong op) { + return ~op; +} + +inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) { + return (op <= 0); +} + +inline int32_t BytecodeInterpreter::VMlongGez(jlong op) { + return (op >= 0); +} + +inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) { + return (op == 0); +} + +inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) { + return (op1 == op2); +} + +inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) { + return (op1 != op2); +} + +inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) { + return (op1 >= op2); +} + +inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) { + return (op1 <= op2); +} + +inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) { + return (op1 < op2); +} + +inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) { + return (op1 > op2); +} + +inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) { + return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0); +} + +// Long conversions + +inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) { + return (jdouble) val; +} + +inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) { + return (jfloat) val; +} + +inline jint BytecodeInterpreter::VMlong2Int(jlong val) { + return (jint) val; +} + +// Double Arithmetic + +inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) { + return op1 + op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) { + return op1 / op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) { + return op1 * op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) { + return -op; +} + +inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) { + return fmod(op1, op2); +} + +inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) { + return op1 - op2; +} + +inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) { + return ( op1 < op2 ? -1 : + op1 > op2 ? 1 : + op1 == op2 ? 0 : + (direction == -1 || direction == 1) ? direction : 0); +} + +// Double Conversions + +inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) { + return (jfloat) val; +} + +// Float Conversions + +inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) { + return (jdouble) op; +} + +// Integer Arithmetic + +inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) { + return op1 + op2; +} + +inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) { + return op1 & op2; +} + +inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) { + /* it's possible we could catch this special case implicitly */ + if ((juint)op1 == 0x80000000 && op2 == -1) return op1; + else return op1 / op2; +} + +inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) { + return op1 * op2; +} + +inline jint BytecodeInterpreter::VMintNeg(jint op) { + return -op; +} + +inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) { + return op1 | op2; +} + +inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) { + /* it's possible we could catch this special case implicitly */ + if ((juint)op1 == 0x80000000 && op2 == -1) return 0; + else return op1 % op2; +} + +inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) { + return op1 << (op2 & 0x1f); +} + +inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) { + return op1 >> (op2 & 0x1f); +} + +inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) { + return op1 - op2; +} + +inline juint BytecodeInterpreter::VMintUshr(jint op1, jint op2) { + return ((juint) op1) >> (op2 & 0x1f); +} + +inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) { + return op1 ^ op2; +} + +inline jdouble BytecodeInterpreter::VMint2Double(jint val) { + return (jdouble) val; +} + +inline jfloat BytecodeInterpreter::VMint2Float(jint val) { + return (jfloat) val; +} + +inline jlong BytecodeInterpreter::VMint2Long(jint val) { + return (jlong) val; +} + +inline jchar BytecodeInterpreter::VMint2Char(jint val) { + return (jchar) val; +} + +inline jshort BytecodeInterpreter::VMint2Short(jint val) { + return (jshort) val; +} + +inline jbyte BytecodeInterpreter::VMint2Byte(jint val) { + return (jbyte) val; +} + +#endif // CC_INTERP + +#endif // CPU_PPC_VM_BYTECODEINTERPRETER_PPC_INLINE_HPP diff --git a/src/cpu/ppc/vm/bytecodes_ppc.cpp b/src/cpu/ppc/vm/bytecodes_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/bytecodes_ppc.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/bytecodes.hpp" + +void Bytecodes::pd_initialize() { + // No ppc specific initialization. +} diff --git a/src/cpu/ppc/vm/bytecodes_ppc.hpp b/src/cpu/ppc/vm/bytecodes_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/bytecodes_ppc.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_BYTECODES_PPC_HPP +#define CPU_PPC_VM_BYTECODES_PPC_HPP + +// No ppc64 specific bytecodes + +#endif // CPU_PPC_VM_BYTECODES_PPC_HPP diff --git a/src/cpu/ppc/vm/bytes_ppc.hpp b/src/cpu/ppc/vm/bytes_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/bytes_ppc.hpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_BYTES_PPC_HPP +#define CPU_PPC_VM_BYTES_PPC_HPP + +#include "memory/allocation.hpp" + +class Bytes: AllStatic { + public: + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering + // PowerPC needs to check for alignment. + + // can I count on address always being a pointer to an unsigned char? Yes + + // Returns true, if the byte ordering used by Java is different from the nativ byte ordering + // of the underlying machine. For example, true for Intel x86, False, for Solaris on Sparc. + static inline bool is_Java_byte_ordering_different() { return false; } + + // Thus, a swap between native and Java ordering is always a no-op: + static inline u2 swap_u2(u2 x) { return x; } + static inline u4 swap_u4(u4 x) { return x; } + static inline u8 swap_u8(u8 x) { return x; } + + static inline u2 get_native_u2(address p) { + return (intptr_t(p) & 1) == 0 + ? *(u2*)p + : ( u2(p[0]) << 8 ) + | ( u2(p[1]) ); + } + + static inline u4 get_native_u4(address p) { + switch (intptr_t(p) & 3) { + case 0: return *(u4*)p; + + case 2: return ( u4( ((u2*)p)[0] ) << 16 ) + | ( u4( ((u2*)p)[1] ) ); + + default: return ( u4(p[0]) << 24 ) + | ( u4(p[1]) << 16 ) + | ( u4(p[2]) << 8 ) + | u4(p[3]); + } + } + + static inline u8 get_native_u8(address p) { + switch (intptr_t(p) & 7) { + case 0: return *(u8*)p; + + case 4: return ( u8( ((u4*)p)[0] ) << 32 ) + | ( u8( ((u4*)p)[1] ) ); + + case 2: return ( u8( ((u2*)p)[0] ) << 48 ) + | ( u8( ((u2*)p)[1] ) << 32 ) + | ( u8( ((u2*)p)[2] ) << 16 ) + | ( u8( ((u2*)p)[3] ) ); + + default: return ( u8(p[0]) << 56 ) + | ( u8(p[1]) << 48 ) + | ( u8(p[2]) << 40 ) + | ( u8(p[3]) << 32 ) + | ( u8(p[4]) << 24 ) + | ( u8(p[5]) << 16 ) + | ( u8(p[6]) << 8 ) + | u8(p[7]); + } + } + + + + static inline void put_native_u2(address p, u2 x) { + if ( (intptr_t(p) & 1) == 0 ) { *(u2*)p = x; } + else { + p[0] = x >> 8; + p[1] = x; + } + } + + static inline void put_native_u4(address p, u4 x) { + switch ( intptr_t(p) & 3 ) { + case 0: *(u4*)p = x; + break; + + case 2: ((u2*)p)[0] = x >> 16; + ((u2*)p)[1] = x; + break; + + default: ((u1*)p)[0] = x >> 24; + ((u1*)p)[1] = x >> 16; + ((u1*)p)[2] = x >> 8; + ((u1*)p)[3] = x; + break; + } + } + + static inline void put_native_u8(address p, u8 x) { + switch ( intptr_t(p) & 7 ) { + case 0: *(u8*)p = x; + break; + + case 4: ((u4*)p)[0] = x >> 32; + ((u4*)p)[1] = x; + break; + + case 2: ((u2*)p)[0] = x >> 48; + ((u2*)p)[1] = x >> 32; + ((u2*)p)[2] = x >> 16; + ((u2*)p)[3] = x; + break; + + default: ((u1*)p)[0] = x >> 56; + ((u1*)p)[1] = x >> 48; + ((u1*)p)[2] = x >> 40; + ((u1*)p)[3] = x >> 32; + ((u1*)p)[4] = x >> 24; + ((u1*)p)[5] = x >> 16; + ((u1*)p)[6] = x >> 8; + ((u1*)p)[7] = x; + } + } + + + // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering) + // (no byte-order reversal is needed since Power CPUs are big-endian oriented). + static inline u2 get_Java_u2(address p) { return get_native_u2(p); } + static inline u4 get_Java_u4(address p) { return get_native_u4(p); } + static inline u8 get_Java_u8(address p) { return get_native_u8(p); } + + static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, x); } + static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, x); } + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, x); } +}; + +#endif // CPU_PPC_VM_BYTES_PPC_HPP diff --git a/src/cpu/ppc/vm/codeBuffer_ppc.hpp b/src/cpu/ppc/vm/codeBuffer_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/codeBuffer_ppc.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_CODEBUFFER_PPC_HPP +#define CPU_PPC_VM_CODEBUFFER_PPC_HPP + +private: + void pd_initialize() {} + +public: + void flush_bundle(bool start_new_bundle) {} + +#endif // CPU_PPC_VM_CODEBUFFER_PPC_HPP diff --git a/src/cpu/ppc/vm/compiledIC_ppc.cpp b/src/cpu/ppc/vm/compiledIC_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/compiledIC_ppc.cpp @@ -0,0 +1,261 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/compiledIC.hpp" +#include "code/icBuffer.hpp" +#include "code/nmethod.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" +#ifdef COMPILER2 +#include "opto/matcher.hpp" +#endif + +// Release the CompiledICHolder* associated with this call site is there is one. +void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) { + // This call site might have become stale so inspect it carefully. + NativeCall* call = nativeCall_at(call_site->addr()); + if (is_icholder_entry(call->destination())) { + NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value()); + InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data()); + } +} + +bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) { + // This call site might have become stale so inspect it carefully. + NativeCall* call = nativeCall_at(call_site->addr()); + return is_icholder_entry(call->destination()); +} + +//----------------------------------------------------------------------------- +// High-level access to an inline cache. Guaranteed to be MT-safe. + +CompiledIC::CompiledIC(nmethod* nm, NativeCall* call) + : _ic_call(call) +{ + address ic_call = call->instruction_address(); + + assert(ic_call != NULL, "ic_call address must be set"); + assert(nm != NULL, "must pass nmethod"); + assert(nm->contains(ic_call), "must be in nmethod"); + + // Search for the ic_call at the given address. + RelocIterator iter(nm, ic_call, ic_call+1); + bool ret = iter.next(); + assert(ret == true, "relocInfo must exist at this address"); + assert(iter.addr() == ic_call, "must find ic_call"); + if (iter.type() == relocInfo::virtual_call_type) { + virtual_call_Relocation* r = iter.virtual_call_reloc(); + _is_optimized = false; + _value = nativeMovConstReg_at(r->cached_value()); + } else { + assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call"); + _is_optimized = true; + _value = NULL; + } +} + +// ---------------------------------------------------------------------------- + +// A PPC CompiledStaticCall looks like this: +// +// >>>> consts +// +// [call target1] +// [IC cache] +// [call target2] +// +// <<<< consts +// >>>> insts +// +// bl offset16 -+ -+ ??? // How many bits available? +// | | +// <<<< insts | | +// >>>> stubs | | +// | |- trampoline_stub_Reloc +// trampoline stub: | <-+ +// r2 = toc | +// r2 = [r2 + offset] | // Load call target1 from const section +// mtctr r2 | +// bctr |- static_stub_Reloc +// comp_to_interp_stub: <---+ +// r1 = toc +// ICreg = [r1 + IC_offset] // Load IC from const section +// r1 = [r1 + offset] // Load call target2 from const section +// mtctr r1 +// bctr +// +// <<<< stubs +// +// The call instruction in the code either +// - branches directly to a compiled method if offset encodable in instruction +// - branches to the trampoline stub if offset to compiled method not encodable +// - branches to the compiled_to_interp stub if target interpreted +// +// Further there are three relocations from the loads to the constants in +// the constant section. +// +// Usage of r1 and r2 in the stubs allows to distinguish them. + +const int IC_pos_in_java_to_interp_stub = 8; +#define __ _masm. +void CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) { +#ifdef COMPILER2 + // Get the mark within main instrs section which is set to the address of the call. + address call_addr = cbuf.insts_mark(); + + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a stub. + MacroAssembler _masm(&cbuf); + + // Start the stub. + address stub = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); + if (stub == NULL) { + Compile::current()->env()->record_out_of_memory_failure(); + return; + } + + // For java_to_interp stubs we use R11_scratch1 as scratch register + // and in call trampoline stubs we use R12_scratch2. This way we + // can distinguish them (see is_NativeCallTrampolineStub_at()). + Register reg_scratch = R11_scratch1; + + // Create a static stub relocation which relates this stub + // with the call instruction at insts_call_instruction_offset in the + // instructions code-section. + __ relocate(static_stub_Relocation::spec(call_addr)); + const int stub_start_offset = __ offset(); + + // Now, create the stub's code: + // - load the TOC + // - load the inline cache oop from the constant pool + // - load the call target from the constant pool + // - call + __ calculate_address_from_global_toc(reg_scratch, __ method_toc()); + AddressLiteral ic = __ allocate_metadata_address((Metadata *)NULL); + __ load_const_from_method_toc(as_Register(Matcher::inline_cache_reg_encode()), ic, reg_scratch); + + if (ReoptimizeCallSequences) { + __ b64_patchable((address)-1, relocInfo::none); + } else { + AddressLiteral a((address)-1); + __ load_const_from_method_toc(reg_scratch, a, reg_scratch); + __ mtctr(reg_scratch); + __ bctr(); + } + + // FIXME: Assert that the stub can be identified and patched. + + // Java_to_interp_stub_size should be good. + assert((__ offset() - stub_start_offset) <= CompiledStaticCall::to_interp_stub_size(), + "should be good size"); + assert(!is_NativeCallTrampolineStub_at(__ addr_at(stub_start_offset)), + "must not confuse java_to_interp with trampoline stubs"); + + // End the stub. + __ end_a_stub(); +#else + ShouldNotReachHere(); +#endif +} +#undef __ + +// Size of java_to_interp stub, this doesn't need to be accurate but it must +// be larger or equal to the real size of the stub. +// Used for optimization in Compile::Shorten_branches. +int CompiledStaticCall::to_interp_stub_size() { + return 12 * BytesPerInstWord; +} + +// Relocation entries for call stub, compiled java to interpreter. +// Used for optimization in Compile::Shorten_branches. +int CompiledStaticCall::reloc_to_interp_stub() { + return 5; +} + +void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) { + address stub = find_stub(); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { + ResourceMark rm; + tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", + instruction_address(), + callee->name_and_sig_as_C_string()); + } + + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + IC_pos_in_java_to_interp_stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), + "a) MT-unsafe modification of inline cache"); + assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, + "b) MT-unsafe modification of inline cache"); + + // Update stub. + method_holder->set_data((intptr_t)callee()); + jump->set_jump_destination(entry); + + // Update jump to call. + set_destination_mt_safe(stub); +} + +void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + IC_pos_in_java_to_interp_stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + method_holder->set_data(0); + jump->set_jump_destination((address)-1); +} + +//----------------------------------------------------------------------------- +// Non-product mode code +#ifndef PRODUCT + +void CompiledStaticCall::verify() { + // Verify call. + NativeCall::verify(); + if (os::is_MP()) { + verify_alignment(); + } + + // Verify stub. + address stub = find_stub(); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + IC_pos_in_java_to_interp_stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + // Verify state. + assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} + +#endif // !PRODUCT diff --git a/src/cpu/ppc/vm/copy_ppc.hpp b/src/cpu/ppc/vm/copy_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/copy_ppc.hpp @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_COPY_PPC_HPP +#define CPU_PPC_VM_COPY_PPC_HPP + +#ifndef PPC64 +#error "copy currently only implemented for PPC64" +#endif + +// Inline functions for memory copy and fill. + +static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); +} + +static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: (void)memcpy(to, from, count * HeapWordSize); + break; + } +} + +static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: while (count-- > 0) { + *to++ = *from++; + } + break; + } +} + +static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); +} + +static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { + pd_disjoint_words(from, to, count); +} + +static void pd_conjoint_bytes(void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +// Template for atomic, element-wise copy. +template +static void copy_conjoint_atomic(T* from, T* to, size_t count) { + if (from > to) { + while (count-- > 0) { + // Copy forwards + *to++ = *from++; + } + } else { + from += count - 1; + to += count - 1; + while (count-- > 0) { + // Copy backwards + *to-- = *from--; + } + } +} + +static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_bytes_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); +} + +static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); +} + +static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); +} + +static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); +} + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + julong* to = (julong*)tohw; + julong v = ((julong)value << 32) | value; + while (count-- > 0) { + *to++ = v; + } +} + +static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { + pd_fill_to_words(tohw, count, value); +} + +static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { + (void)memset(to, value, count); +} + +static void pd_zero_to_words(HeapWord* tohw, size_t count) { + pd_fill_to_words(tohw, count, 0); +} + +static void pd_zero_to_bytes(void* to, size_t count) { + (void)memset(to, 0, count); +} + +#endif // CPU_PPC_VM_COPY_PPC_HPP diff --git a/src/cpu/ppc/vm/cppInterpreterGenerator_ppc.hpp b/src/cpu/ppc/vm/cppInterpreterGenerator_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/cppInterpreterGenerator_ppc.hpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_CPPINTERPRETERGENERATOR_PPC_HPP +#define CPU_PPC_VM_CPPINTERPRETERGENERATOR_PPC_HPP + + address generate_normal_entry(void); + address generate_native_entry(void); + + void lock_method(void); + void unlock_method(void); + + void generate_counter_incr(Label& overflow); + void generate_counter_overflow(Label& do_continue); + + void generate_more_monitors(); + void generate_deopt_handling(Register result_index); + + void generate_compute_interpreter_state(Label& exception_return); + +#endif // CPU_PPC_VM_CPPINTERPRETERGENERATOR_PPC_HPP diff --git a/src/cpu/ppc/vm/cppInterpreter_ppc.cpp b/src/cpu/ppc/vm/cppInterpreter_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/cppInterpreter_ppc.cpp @@ -0,0 +1,3044 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/cppInterpreter.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" +#ifdef SHARK +#include "shark/shark_globals.hpp" +#endif + +#ifdef CC_INTERP + +#define __ _masm-> + +// Contains is used for identifying interpreter frames during a stack-walk. +// A frame with a PC in InterpretMethod must be identified as a normal C frame. +bool CppInterpreter::contains(address pc) { + return _code->contains(pc); +} + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) // nothing +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +static address interpreter_frame_manager = NULL; +static address frame_manager_specialized_return = NULL; +static address native_entry = NULL; + +static address interpreter_return_address = NULL; + +static address unctrap_frame_manager_entry = NULL; + +static address deopt_frame_manager_return_atos = NULL; +static address deopt_frame_manager_return_btos = NULL; +static address deopt_frame_manager_return_itos = NULL; +static address deopt_frame_manager_return_ltos = NULL; +static address deopt_frame_manager_return_ftos = NULL; +static address deopt_frame_manager_return_dtos = NULL; +static address deopt_frame_manager_return_vtos = NULL; + +// A result handler converts/unboxes a native call result into +// a java interpreter/compiler result. The current frame is an +// interpreter frame. +address CppInterpreterGenerator::generate_result_handler_for(BasicType type) { + return AbstractInterpreterGenerator::generate_result_handler_for(type); +} + +// tosca based result to c++ interpreter stack based result. +address CppInterpreterGenerator::generate_tosca_to_stack_converter(BasicType type) { + // + // A result is in the native abi result register from a native + // method call. We need to return this result to the interpreter by + // pushing the result on the interpreter's stack. + // + // Registers alive: + // R3_ARG1(R3_RET)/F1_ARG1(F1_RET) - result to move + // R4_ARG2 - address of tos + // LR + // + // Registers updated: + // R3_RET(R3_ARG1) - address of new tos (== R17_tos for T_VOID) + // + + int number_of_used_slots = 1; + + const Register tos = R4_ARG2; + Label done; + Label is_false; + + address entry = __ pc(); + + switch (type) { + case T_BOOLEAN: + __ cmpwi(CCR0, R3_RET, 0); + __ beq(CCR0, is_false); + __ li(R3_RET, 1); + __ stw(R3_RET, 0, tos); + __ b(done); + __ bind(is_false); + __ li(R3_RET, 0); + __ stw(R3_RET, 0, tos); + break; + case T_BYTE: + case T_CHAR: + case T_SHORT: + case T_INT: + __ stw(R3_RET, 0, tos); + break; + case T_LONG: + number_of_used_slots = 2; + // mark unused slot for debugging + // long goes to topmost slot + __ std(R3_RET, -BytesPerWord, tos); + __ li(R3_RET, 0); + __ std(R3_RET, 0, tos); + break; + case T_OBJECT: + __ verify_oop(R3_RET); + __ std(R3_RET, 0, tos); + break; + case T_FLOAT: + __ stfs(F1_RET, 0, tos); + break; + case T_DOUBLE: + number_of_used_slots = 2; + // mark unused slot for debugging + __ li(R3_RET, 0); + __ std(R3_RET, 0, tos); + // double goes to topmost slot + __ stfd(F1_RET, -BytesPerWord, tos); + break; + case T_VOID: + number_of_used_slots = 0; + break; + default: + ShouldNotReachHere(); + } + + __ BIND(done); + + // new expression stack top + __ addi(R3_RET, tos, -BytesPerWord * number_of_used_slots); + + __ blr(); + + return entry; +} + +address CppInterpreterGenerator::generate_stack_to_stack_converter(BasicType type) { + // + // Copy the result from the callee's stack to the caller's stack, + // caller and callee both being interpreted. + // + // Registers alive + // R3_ARG1 - address of callee's tos + BytesPerWord + // R4_ARG2 - address of caller's tos [i.e. free location] + // LR + // + // stack grows upwards, memory grows downwards. + // + // [ free ] <-- callee's tos + // [ optional result ] <-- R3_ARG1 + // [ optional dummy ] + // ... + // [ free ] <-- caller's tos, R4_ARG2 + // ... + // Registers updated + // R3_RET(R3_ARG1) - address of caller's new tos + // + // stack grows upwards, memory grows downwards. + // + // [ free ] <-- current tos, R3_RET + // [ optional result ] + // [ optional dummy ] + // ... + // + + const Register from = R3_ARG1; + const Register ret = R3_ARG1; + const Register tos = R4_ARG2; + const Register tmp1 = R21_tmp1; + const Register tmp2 = R22_tmp2; + + address entry = __ pc(); + + switch (type) { + case T_BOOLEAN: + case T_BYTE: + case T_CHAR: + case T_SHORT: + case T_INT: + case T_FLOAT: + __ lwz(tmp1, 0, from); + __ stw(tmp1, 0, tos); + // New expression stack top. + __ addi(ret, tos, - BytesPerWord); + break; + case T_LONG: + case T_DOUBLE: + // Move both entries for debug purposes even though only one is live. + __ ld(tmp1, BytesPerWord, from); + __ ld(tmp2, 0, from); + __ std(tmp1, 0, tos); + __ std(tmp2, -BytesPerWord, tos); + // New expression stack top. + __ addi(ret, tos, - 2 * BytesPerWord); // two slots + break; + case T_OBJECT: + __ ld(tmp1, 0, from); + __ verify_oop(tmp1); + __ std(tmp1, 0, tos); + // New expression stack top. + __ addi(ret, tos, - BytesPerWord); + break; + case T_VOID: + // New expression stack top. + __ mr(ret, tos); + break; + default: + ShouldNotReachHere(); + } + + __ blr(); + + return entry; +} + +address CppInterpreterGenerator::generate_stack_to_native_abi_converter(BasicType type) { + // + // Load a result from the callee's stack into the caller's expecting + // return register, callee being interpreted, caller being call stub + // or jit code. + // + // Registers alive + // R3_ARG1 - callee expression tos + BytesPerWord + // LR + // + // stack grows upwards, memory grows downwards. + // + // [ free ] <-- callee's tos + // [ optional result ] <-- R3_ARG1 + // [ optional dummy ] + // ... + // + // Registers updated + // R3_RET(R3_ARG1)/F1_RET - result + // + + const Register from = R3_ARG1; + const Register ret = R3_ARG1; + const FloatRegister fret = F1_ARG1; + + address entry = __ pc(); + + // Implemented uniformly for both kinds of endianness. The interpreter + // implements boolean, byte, char, and short as jint (4 bytes). + switch (type) { + case T_BOOLEAN: + case T_CHAR: + // zero extension + __ lwz(ret, 0, from); + break; + case T_BYTE: + case T_SHORT: + case T_INT: + // sign extension + __ lwa(ret, 0, from); + break; + case T_LONG: + __ ld(ret, 0, from); + break; + case T_OBJECT: + __ ld(ret, 0, from); + __ verify_oop(ret); + break; + case T_FLOAT: + __ lfs(fret, 0, from); + break; + case T_DOUBLE: + __ lfd(fret, 0, from); + break; + case T_VOID: + break; + default: + ShouldNotReachHere(); + } + + __ blr(); + + return entry; +} + +address CppInterpreter::return_entry(TosState state, int length) { + assert(interpreter_return_address != NULL, "Not initialized"); + return interpreter_return_address; +} + +address CppInterpreter::deopt_entry(TosState state, int length) { + address ret = NULL; + if (length != 0) { + switch (state) { + case atos: ret = deopt_frame_manager_return_atos; break; + case btos: ret = deopt_frame_manager_return_itos; break; + case ctos: + case stos: + case itos: ret = deopt_frame_manager_return_itos; break; + case ltos: ret = deopt_frame_manager_return_ltos; break; + case ftos: ret = deopt_frame_manager_return_ftos; break; + case dtos: ret = deopt_frame_manager_return_dtos; break; + case vtos: ret = deopt_frame_manager_return_vtos; break; + default: ShouldNotReachHere(); + } + } else { + ret = unctrap_frame_manager_entry; // re-execute the bytecode (e.g. uncommon trap, popframe) + } + assert(ret != NULL, "Not initialized"); + return ret; +} + +// +// Helpers for commoning out cases in the various type of method entries. +// + +// +// Registers alive +// R16_thread - JavaThread* +// R1_SP - old stack pointer +// R19_method - callee's Method +// R17_tos - address of caller's tos (prepushed) +// R15_prev_state - address of caller's BytecodeInterpreter or 0 +// return_pc in R21_tmp15 (only when called within generate_native_entry) +// +// Registers updated +// R14_state - address of callee's interpreter state +// R1_SP - new stack pointer +// CCR4_is_synced - current method is synchronized +// +void CppInterpreterGenerator::generate_compute_interpreter_state(Label& stack_overflow_return) { + // + // Stack layout at this point: + // + // F1 [TOP_IJAVA_FRAME_ABI] <-- R1_SP + // alignment (optional) + // [F1's outgoing Java arguments] <-- R17_tos + // ... + // F2 [PARENT_IJAVA_FRAME_ABI] + // ... + + //============================================================================= + // Allocate space for locals other than the parameters, the + // interpreter state, monitors, and the expression stack. + + const Register local_count = R21_tmp1; + const Register parameter_count = R22_tmp2; + const Register max_stack = R23_tmp3; + // Must not be overwritten within this method! + // const Register return_pc = R29_tmp9; + + const ConditionRegister is_synced = CCR4_is_synced; + const ConditionRegister is_native = CCR6; + const ConditionRegister is_static = CCR7; + + assert(is_synced != is_native, "condition code registers must be distinct"); + assert(is_synced != is_static, "condition code registers must be distinct"); + assert(is_native != is_static, "condition code registers must be distinct"); + + { + + // Local registers + const Register top_frame_size = R24_tmp4; + const Register access_flags = R25_tmp5; + const Register state_offset = R26_tmp6; + Register mem_stack_limit = R27_tmp7; + const Register page_size = R28_tmp8; + + BLOCK_COMMENT("compute_interpreter_state {"); + + // access_flags = method->access_flags(); + // TODO: PPC port: assert(4 == methodOopDesc::sz_access_flags(), "unexpected field size"); + __ lwa(access_flags, method_(access_flags)); + + // parameter_count = method->constMethod->size_of_parameters(); + // TODO: PPC port: assert(2 == ConstMethod::sz_size_of_parameters(), "unexpected field size"); + __ ld(max_stack, in_bytes(Method::const_offset()), R19_method); // Max_stack holds constMethod for a while. + __ lhz(parameter_count, in_bytes(ConstMethod::size_of_parameters_offset()), max_stack); + + // local_count = method->constMethod()->max_locals(); + // TODO: PPC port: assert(2 == ConstMethod::sz_max_locals(), "unexpected field size"); + __ lhz(local_count, in_bytes(ConstMethod::size_of_locals_offset()), max_stack); + + // max_stack = method->constMethod()->max_stack(); + // TODO: PPC port: assert(2 == ConstMethod::sz_max_stack(), "unexpected field size"); + __ lhz(max_stack, in_bytes(ConstMethod::max_stack_offset()), max_stack); + + if (EnableInvokeDynamic) { + // Take into account 'extra_stack_entries' needed by method handles (see method.hpp). + __ addi(max_stack, max_stack, Method::extra_stack_entries()); + } + + // mem_stack_limit = thread->stack_limit(); + __ ld(mem_stack_limit, thread_(stack_overflow_limit)); + + // Point locals at the first argument. Method's locals are the + // parameters on top of caller's expression stack. + + // tos points past last Java argument + __ sldi(R18_locals, parameter_count, Interpreter::logStackElementSize); + __ add(R18_locals, R17_tos, R18_locals); + + // R18_locals - i*BytesPerWord points to i-th Java local (i starts at 0) + + // Set is_native, is_synced, is_static - will be used later. + __ testbitdi(is_native, R0, access_flags, JVM_ACC_NATIVE_BIT); + __ testbitdi(is_synced, R0, access_flags, JVM_ACC_SYNCHRONIZED_BIT); + assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile"); + __ testbitdi(is_static, R0, access_flags, JVM_ACC_STATIC_BIT); + + // PARENT_IJAVA_FRAME_ABI + // + // frame_size = + // round_to((local_count - parameter_count)*BytesPerWord + + // 2*BytesPerWord + + // alignment + + // frame::interpreter_frame_cinterpreterstate_size_in_bytes() + // sizeof(PARENT_IJAVA_FRAME_ABI) + // method->is_synchronized() ? sizeof(BasicObjectLock) : 0 + + // max_stack*BytesPerWord, + // 16) + // + // Note that this calculation is exactly mirrored by + // AbstractInterpreter::layout_activation_impl() [ and + // AbstractInterpreter::size_activation() ]. Which is used by + // deoptimization so that it can allocate the proper sized + // frame. This only happens for interpreted frames so the extra + // notes below about max_stack below are not important. The other + // thing to note is that for interpreter frames other than the + // current activation the size of the stack is the size of the live + // portion of the stack at the particular bcp and NOT the maximum + // stack that the method might use. + // + // If we're calling a native method, we replace max_stack (which is + // zero) with space for the worst-case signature handler varargs + // vector, which is: + // + // max_stack = max(Argument::n_register_parameters, parameter_count+2); + // + // We add two slots to the parameter_count, one for the jni + // environment and one for a possible native mirror. We allocate + // space for at least the number of ABI registers, even though + // InterpreterRuntime::slow_signature_handler won't write more than + // parameter_count+2 words when it creates the varargs vector at the + // top of the stack. The generated slow signature handler will just + // load trash into registers beyond the necessary number. We're + // still going to cut the stack back by the ABI register parameter + // count so as to get SP+16 pointing at the ABI outgoing parameter + // area, so we need to allocate at least that much even though we're + // going to throw it away. + // + + // Adjust max_stack for native methods: + Label skip_native_calculate_max_stack; + __ bfalse(is_native, skip_native_calculate_max_stack); + // if (is_native) { + // max_stack = max(Argument::n_register_parameters, parameter_count+2); + __ addi(max_stack, parameter_count, 2*Interpreter::stackElementWords); + __ cmpwi(CCR0, max_stack, Argument::n_register_parameters); + __ bge(CCR0, skip_native_calculate_max_stack); + __ li(max_stack, Argument::n_register_parameters); + // } + __ bind(skip_native_calculate_max_stack); + // max_stack is now in bytes + __ slwi(max_stack, max_stack, Interpreter::logStackElementSize); + + // Calculate number of non-parameter locals (in slots): + Label not_java; + __ btrue(is_native, not_java); + // if (!is_native) { + // local_count = non-parameter local count + __ sub(local_count, local_count, parameter_count); + // } else { + // // nothing to do: method->max_locals() == 0 for native methods + // } + __ bind(not_java); + + + // Calculate top_frame_size and parent_frame_resize. + { + const Register parent_frame_resize = R12_scratch2; + + BLOCK_COMMENT("Compute top_frame_size."); + // top_frame_size = TOP_IJAVA_FRAME_ABI + // + size of interpreter state + __ li(top_frame_size, frame::top_ijava_frame_abi_size + + frame::interpreter_frame_cinterpreterstate_size_in_bytes()); + // + max_stack + __ add(top_frame_size, top_frame_size, max_stack); + // + stack slots for a BasicObjectLock for synchronized methods + { + Label not_synced; + __ bfalse(is_synced, not_synced); + __ addi(top_frame_size, top_frame_size, frame::interpreter_frame_monitor_size_in_bytes()); + __ bind(not_synced); + } + // align + __ round_to(top_frame_size, frame::alignment_in_bytes); + + + BLOCK_COMMENT("Compute parent_frame_resize."); + // parent_frame_resize = R1_SP - R17_tos + __ sub(parent_frame_resize, R1_SP, R17_tos); + //__ li(parent_frame_resize, 0); + // + PARENT_IJAVA_FRAME_ABI + // + extra two slots for the no-parameter/no-locals + // method result + __ addi(parent_frame_resize, parent_frame_resize, + frame::parent_ijava_frame_abi_size + + 2*Interpreter::stackElementSize); + // + (locals_count - params_count) + __ sldi(R0, local_count, Interpreter::logStackElementSize); + __ add(parent_frame_resize, parent_frame_resize, R0); + // align + __ round_to(parent_frame_resize, frame::alignment_in_bytes); + + // + // Stack layout at this point: + // + // The new frame F0 hasn't yet been pushed, F1 is still the top frame. + // + // F0 [TOP_IJAVA_FRAME_ABI] + // alignment (optional) + // [F0's full operand stack] + // [F0's monitors] (optional) + // [F0's BytecodeInterpreter object] + // F1 [PARENT_IJAVA_FRAME_ABI] + // alignment (optional) + // [F0's Java result] + // [F0's non-arg Java locals] + // [F1's outgoing Java arguments] <-- R17_tos + // ... + // F2 [PARENT_IJAVA_FRAME_ABI] + // ... + + + // Calculate new R14_state + // and + // test that the new memory stack pointer is above the limit, + // throw a StackOverflowError otherwise. + __ sub(R11_scratch1/*F1's SP*/, R1_SP, parent_frame_resize); + __ addi(R14_state, R11_scratch1/*F1's SP*/, + -frame::interpreter_frame_cinterpreterstate_size_in_bytes()); + __ sub(R11_scratch1/*F0's SP*/, + R11_scratch1/*F1's SP*/, top_frame_size); + + BLOCK_COMMENT("Test for stack overflow:"); + __ cmpld(CCR0/*is_stack_overflow*/, R11_scratch1, mem_stack_limit); + __ blt(CCR0/*is_stack_overflow*/, stack_overflow_return); + + + //============================================================================= + // Frame_size doesn't overflow the stack. Allocate new frame and + // initialize interpreter state. + + // Register state + // + // R15 - local_count + // R16 - parameter_count + // R17 - max_stack + // + // R18 - frame_size + // R19 - access_flags + // CCR4_is_synced - is_synced + // + // GR_Lstate - pointer to the uninitialized new BytecodeInterpreter. + + // _last_Java_pc just needs to be close enough that we can identify + // the frame as an interpreted frame. It does not need to be the + // exact return address from either calling + // BytecodeInterpreter::InterpretMethod or the call to a jni native method. + // So we can initialize it here with a value of a bundle in this + // code fragment. We only do this initialization for java frames + // where InterpretMethod needs a a way to get a good pc value to + // store in the thread state. For interpreter frames used to call + // jni native code we just zero the value in the state and move an + // ip as needed in the native entry code. + // + // const Register last_Java_pc_addr = GR24_SCRATCH; // QQQ 27 + // const Register last_Java_pc = GR26_SCRATCH; + + // Must reference stack before setting new SP since Windows + // will not be able to deliver the exception on a bad SP. + // Windows also insists that we bang each page one at a time in order + // for the OS to map in the reserved pages. If we bang only + // the final page, Windows stops delivering exceptions to our + // VectoredExceptionHandler and terminates our program. + // Linux only requires a single bang but it's rare to have + // to bang more than 1 page so the code is enabled for both OS's. + + // BANG THE STACK + // + // Nothing to do for PPC, because updating the SP will automatically + // bang the page. + + // Up to here we have calculated the delta for the new C-frame and + // checked for a stack-overflow. Now we can savely update SP and + // resize the C-frame. + + // R14_state has already been calculated. + __ push_interpreter_frame(top_frame_size, parent_frame_resize, + R25_tmp5, R26_tmp6, R27_tmp7, R28_tmp8); + + } + + // + // Stack layout at this point: + // + // F0 has been been pushed! + // + // F0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP + // alignment (optional) (now it's here, if required) + // [F0's full operand stack] + // [F0's monitors] (optional) + // [F0's BytecodeInterpreter object] + // F1 [PARENT_IJAVA_FRAME_ABI] + // alignment (optional) (now it's here, if required) + // [F0's Java result] + // [F0's non-arg Java locals] + // [F1's outgoing Java arguments] + // ... + // F2 [PARENT_IJAVA_FRAME_ABI] + // ... + // + // R14_state points to F0's BytecodeInterpreter object. + // + + } + + //============================================================================= + // new BytecodeInterpreter-object is save, let's initialize it: + BLOCK_COMMENT("New BytecodeInterpreter-object is save."); + + { + // Locals + const Register bytecode_addr = R24_tmp4; + const Register constants = R25_tmp5; + const Register tos = R26_tmp6; + const Register stack_base = R27_tmp7; + const Register local_addr = R28_tmp8; + { + Label L; + __ btrue(is_native, L); + // if (!is_native) { + // bytecode_addr = constMethod->codes(); + __ ld(bytecode_addr, method_(const)); + __ addi(bytecode_addr, bytecode_addr, in_bytes(ConstMethod::codes_offset())); + // } + __ bind(L); + } + + __ ld(constants, in_bytes(Method::const_offset()), R19_method); + __ ld(constants, in_bytes(ConstMethod::constants_offset()), constants); + + // state->_prev_link = prev_state; + __ std(R15_prev_state, state_(_prev_link)); + + // For assertions only. + // TODO: not needed anyway because it coincides with `_monitor_base'. remove! + // state->_self_link = state; + DEBUG_ONLY(__ std(R14_state, state_(_self_link));) + + // state->_thread = thread; + __ std(R16_thread, state_(_thread)); + + // state->_method = method; + __ std(R19_method, state_(_method)); + + // state->_locals = locals; + __ std(R18_locals, state_(_locals)); + + // state->_oop_temp = NULL; + __ li(R0, 0); + __ std(R0, state_(_oop_temp)); + + // state->_last_Java_fp = *R1_SP // Use *R1_SP as fp + __ ld(R0, _abi(callers_sp), R1_SP); + __ std(R0, state_(_last_Java_fp)); + + BLOCK_COMMENT("load Stack base:"); + { + // Stack_base. + // if (!method->synchronized()) { + // stack_base = state; + // } else { + // stack_base = (uintptr_t)state - sizeof(BasicObjectLock); + // } + Label L; + __ mr(stack_base, R14_state); + __ bfalse(is_synced, L); + __ addi(stack_base, stack_base, -frame::interpreter_frame_monitor_size_in_bytes()); + __ bind(L); + } + + // state->_mdx = NULL; + __ li(R0, 0); + __ std(R0, state_(_mdx)); + + { + // if (method->is_native()) state->_bcp = NULL; + // else state->_bcp = bytecode_addr; + Label label1, label2; + __ bfalse(is_native, label1); + __ std(R0, state_(_bcp)); + __ b(label2); + __ bind(label1); + __ std(bytecode_addr, state_(_bcp)); + __ bind(label2); + } + + + // state->_result._to_call._callee = NULL; + __ std(R0, state_(_result._to_call._callee)); + + // state->_monitor_base = state; + __ std(R14_state, state_(_monitor_base)); + + // state->_msg = BytecodeInterpreter::method_entry; + __ li(R0, BytecodeInterpreter::method_entry); + __ stw(R0, state_(_msg)); + + // state->_last_Java_sp = R1_SP; + __ std(R1_SP, state_(_last_Java_sp)); + + // state->_stack_base = stack_base; + __ std(stack_base, state_(_stack_base)); + + // tos = stack_base - 1 slot (prepushed); + // state->_stack.Tos(tos); + __ addi(tos, stack_base, - Interpreter::stackElementSize); + __ std(tos, state_(_stack)); + + + { + BLOCK_COMMENT("get last_Java_pc:"); + // if (!is_native) state->_last_Java_pc = ; + // else state->_last_Java_pc = NULL; (just for neatness) + Label label1, label2; + __ btrue(is_native, label1); + __ get_PC_trash_LR(R0); + __ std(R0, state_(_last_Java_pc)); + __ b(label2); + __ bind(label1); + __ li(R0, 0); + __ std(R0, state_(_last_Java_pc)); + __ bind(label2); + } + + + // stack_limit = tos - max_stack; + __ sub(R0, tos, max_stack); + // state->_stack_limit = stack_limit; + __ std(R0, state_(_stack_limit)); + + + // cache = method->constants()->cache(); + __ ld(R0, ConstantPool::cache_offset_in_bytes(), constants); + // state->_constants = method->constants()->cache(); + __ std(R0, state_(_constants)); + + + + //============================================================================= + // synchronized method, allocate and initialize method object lock. + // if (!method->is_synchronized()) goto fill_locals_with_0x0s; + Label fill_locals_with_0x0s; + __ bfalse(is_synced, fill_locals_with_0x0s); + + // pool_holder = method->constants()->pool_holder(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + { + Label label1, label2; + // lockee = NULL; for java methods, correct value will be inserted in BytecodeInterpretMethod.hpp + __ li(R0,0); + __ bfalse(is_native, label2); + + __ bfalse(is_static, label1); + // if (method->is_static()) lockee = + // pool_holder->klass_part()->java_mirror(); + __ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(), constants); + __ ld(R0/*lockee*/, mirror_offset, R11_scratch1/*pool_holder*/); + __ b(label2); + + __ bind(label1); + // else lockee = *(oop*)locals; + __ ld(R0/*lockee*/, 0, R18_locals); + __ bind(label2); + + // monitor->set_obj(lockee); + __ std(R0/*lockee*/, BasicObjectLock::obj_offset_in_bytes(), stack_base); + } + + // See if we need to zero the locals + __ BIND(fill_locals_with_0x0s); + + + //============================================================================= + // fill locals with 0x0s + Label locals_zeroed; + __ btrue(is_native, locals_zeroed); + + if (true /* zerolocals */ || ClearInterpreterLocals) { + // local_count is already num_locals_slots - num_param_slots + __ sldi(R0, parameter_count, Interpreter::logStackElementSize); + __ sub(local_addr, R18_locals, R0); + __ cmpdi(CCR0, local_count, 0); + __ ble(CCR0, locals_zeroed); + + __ mtctr(local_count); + //__ ld_const_addr(R0, (address) 0xcafe0000babe); + __ li(R0, 0); + + Label zero_slot; + __ bind(zero_slot); + + // first local is at local_addr + __ std(R0, 0, local_addr); + __ addi(local_addr, local_addr, -BytesPerWord); + __ bdnz(zero_slot); + } + + __ BIND(locals_zeroed); + + } + BLOCK_COMMENT("} compute_interpreter_state"); +} + +// Generate code to initiate compilation on invocation counter overflow. +void CppInterpreterGenerator::generate_counter_overflow(Label& continue_entry) { + // Registers alive + // R14_state + // R16_thread + // + // Registers updated + // R14_state + // R3_ARG1 (=R3_RET) + // R4_ARG2 + + // After entering the vm we remove the activation and retry the + // entry point in case the compilation is complete. + + // InterpreterRuntime::frequency_counter_overflow takes one argument + // that indicates if the counter overflow occurs at a backwards + // branch (NULL bcp). We pass zero. The call returns the address + // of the verified entry point for the method or NULL if the + // compilation did not complete (either went background or bailed + // out). + __ li(R4_ARG2, 0); + + // Pass false to call_VM so it doesn't check for pending exceptions, + // since at this point in the method invocation the exception + // handler would try to exit the monitor of synchronized methods + // which haven't been entered yet. + // + // Returns verified_entry_point or NULL, we don't care which. + // + // Do not use the variant `frequency_counter_overflow' that returns + // a structure, because this will change the argument list by a + // hidden parameter (gcc 4.1). + + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), + R4_ARG2, + false); + // Returns verified_entry_point or NULL, we don't care which as we ignore it + // and run interpreted. + + // Reload method, it may have moved. + __ ld(R19_method, state_(_method)); + + // We jump now to the label "continue_after_compile". + __ b(continue_entry); +} + +// Increment invocation count and check for overflow. +// +// R19_method must contain Method* of method to profile. +void CppInterpreterGenerator::generate_counter_incr(Label& overflow) { + Label done; + const Register Rcounters = R12_scratch2; + const Register iv_be_count = R11_scratch1; + const Register invocation_limit = R12_scratch2; + const Register invocation_limit_addr = invocation_limit; + + // Load and ev. allocate MethodCounters object. + __ get_method_counters(R19_method, Rcounters, done); + + // Update standard invocation counters. + __ increment_invocation_counter(Rcounters, iv_be_count, R0); + + // Compare against limit. + BLOCK_COMMENT("Compare counter against limit:"); + assert(4 == sizeof(InvocationCounter::InterpreterInvocationLimit), + "must be 4 bytes"); + __ load_const(invocation_limit_addr, (address)&InvocationCounter::InterpreterInvocationLimit); + __ lwa(invocation_limit, 0, invocation_limit_addr); + __ cmpw(CCR0, iv_be_count, invocation_limit); + __ bge(CCR0, overflow); + __ bind(done); +} + +// +// Call a JNI method. +// +// Interpreter stub for calling a native method. (C++ interpreter) +// This sets up a somewhat different looking stack for calling the native method +// than the typical interpreter frame setup. +// +address CppInterpreterGenerator::generate_native_entry(void) { + if (native_entry != NULL) return native_entry; + address entry = __ pc(); + + // Read + // R16_thread + // R15_prev_state - address of caller's BytecodeInterpreter, if this snippet + // gets called by the frame manager. + // R19_method - callee's Method + // R17_tos - address of caller's tos + // R1_SP - caller's stack pointer + // R21_sender_SP - initial caller sp + // + // Update + // R14_state - address of caller's BytecodeInterpreter + // R3_RET - integer result, if any. + // F1_RET - float result, if any. + // + // + // Stack layout at this point: + // + // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP + // alignment (optional) + // [outgoing Java arguments] <-- R17_tos + // ... + // PARENT [PARENT_IJAVA_FRAME_ABI] + // ... + // + + const bool inc_counter = UseCompiler || CountCompiledCalls; + + const Register signature_handler_fd = R21_tmp1; + const Register pending_exception = R22_tmp2; + const Register result_handler_addr = R23_tmp3; + const Register native_method_fd = R24_tmp4; + const Register access_flags = R25_tmp5; + const Register active_handles = R26_tmp6; + const Register sync_state = R27_tmp7; + const Register sync_state_addr = sync_state; // Address is dead after use. + const Register suspend_flags = R24_tmp4; + + const Register return_pc = R28_tmp8; // Register will be locked for some time. + + const ConditionRegister is_synced = CCR4_is_synced; // Live-on-exit from compute_interpreter_state. + + + // R1_SP still points to caller's SP at this point. + + // Save initial_caller_sp to caller's abi. The caller frame must be + // resized before returning to get rid of the c2i arguments (if + // any). + // Override the saved SP with the senderSP so we can pop c2i + // arguments (if any) off when we return + __ std(R21_sender_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP); + + // Save LR to caller's frame. We don't use _abi(lr) here, because it is not safe. + __ mflr(return_pc); + __ std(return_pc, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + + assert(return_pc->is_nonvolatile(), "return_pc must be a non-volatile register"); + + __ verify_method_ptr(R19_method); + + //============================================================================= + + // If this snippet gets called by the frame manager (at label + // `call_special'), then R15_prev_state is valid. If this snippet + // is not called by the frame manager, but e.g. by the call stub or + // by compiled code, then R15_prev_state is invalid. + { + // Set R15_prev_state to 0 if we don't return to the frame + // manager; we will return to the call_stub or to compiled code + // instead. If R15_prev_state is 0 there will be only one + // interpreter frame (we will set this up later) in this C frame! + // So we must take care about retrieving prev_state_(_prev_link) + // and restoring R1_SP when popping that interpreter. + Label prev_state_is_valid; + + __ load_const(R11_scratch1/*frame_manager_returnpc_addr*/, (address)&frame_manager_specialized_return); + __ ld(R12_scratch2/*frame_manager_returnpc*/, 0, R11_scratch1/*frame_manager_returnpc_addr*/); + __ cmpd(CCR0, return_pc, R12_scratch2/*frame_manager_returnpc*/); + __ beq(CCR0, prev_state_is_valid); + + __ li(R15_prev_state, 0); + + __ BIND(prev_state_is_valid); + } + + //============================================================================= + // Allocate new frame and initialize interpreter state. + + Label exception_return; + Label exception_return_sync_check; + Label stack_overflow_return; + + // Generate new interpreter state and jump to stack_overflow_return in case of + // a stack overflow. + generate_compute_interpreter_state(stack_overflow_return); + + //============================================================================= + // Increment invocation counter. On overflow, entry to JNI method + // will be compiled. + Label invocation_counter_overflow; + if (inc_counter) { + generate_counter_incr(invocation_counter_overflow); + } + + Label continue_after_compile; + __ BIND(continue_after_compile); + + // access_flags = method->access_flags(); + // Load access flags. + assert(access_flags->is_nonvolatile(), + "access_flags must be in a non-volatile register"); + // Type check. + // TODO: PPC port: assert(4 == methodOopDesc::sz_access_flags(), "unexpected field size"); + __ lwz(access_flags, method_(access_flags)); + + // We don't want to reload R19_method and access_flags after calls + // to some helper functions. + assert(R19_method->is_nonvolatile(), "R19_method must be a non-volatile register"); + + // Check for synchronized methods. Must happen AFTER invocation counter + // check, so method is not locked if counter overflows. + + { + Label method_is_not_synced; + // Is_synced is still alive. + assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile"); + __ bfalse(is_synced, method_is_not_synced); + + lock_method(); + // Reload method, it may have moved. + __ ld(R19_method, state_(_method)); + + __ BIND(method_is_not_synced); + } + + // jvmti/jvmpi support + __ notify_method_entry(); + + // Reload method, it may have moved. + __ ld(R19_method, state_(_method)); + + //============================================================================= + // Get and call the signature handler + + __ ld(signature_handler_fd, method_(signature_handler)); + Label call_signature_handler; + + __ cmpdi(CCR0, signature_handler_fd, 0); + __ bne(CCR0, call_signature_handler); + + // Method has never been called. Either generate a specialized + // handler or point to the slow one. + // + // Pass parameter 'false' to avoid exception check in call_VM. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), R19_method, false); + + // Check for an exception while looking up the target method. If we + // incurred one, bail. + __ ld(pending_exception, thread_(pending_exception)); + __ cmpdi(CCR0, pending_exception, 0); + __ bne(CCR0, exception_return_sync_check); // has pending exception + + // reload method + __ ld(R19_method, state_(_method)); + + // Reload signature handler, it may have been created/assigned in the meanwhile + __ ld(signature_handler_fd, method_(signature_handler)); + + __ BIND(call_signature_handler); + + // Before we call the signature handler we push a new frame to + // protect the interpreter frame volatile registers when we return + // from jni but before we can get back to Java. + + // First set the frame anchor while the SP/FP registers are + // convenient and the slow signature handler can use this same frame + // anchor. + + // We have a TOP_IJAVA_FRAME here, which belongs to us. + __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R12_scratch2/*tmp*/); + + // Now the interpreter frame (and its call chain) have been + // invalidated and flushed. We are now protected against eager + // being enabled in native code. Even if it goes eager the + // registers will be reloaded as clean and we will invalidate after + // the call so no spurious flush should be possible. + + // Call signature handler and pass locals address. + // + // Our signature handlers copy required arguments to the C stack + // (outgoing C args), R3_ARG1 to R10_ARG8, and F1_ARG1 to + // F13_ARG13. + __ mr(R3_ARG1, R18_locals); + __ ld(signature_handler_fd, 0, signature_handler_fd); + __ call_stub(signature_handler_fd); + // reload method + __ ld(R19_method, state_(_method)); + + // Remove the register parameter varargs slots we allocated in + // compute_interpreter_state. SP+16 ends up pointing to the ABI + // outgoing argument area. + // + // Not needed on PPC64. + //__ add(SP, SP, Argument::n_register_parameters*BytesPerWord); + + assert(result_handler_addr->is_nonvolatile(), "result_handler_addr must be in a non-volatile register"); + // Save across call to native method. + __ mr(result_handler_addr, R3_RET); + + // Set up fixed parameters and call the native method. + // If the method is static, get mirror into R4_ARG2. + + { + Label method_is_not_static; + // access_flags is non-volatile and still, no need to restore it + + // restore access flags + __ testbitdi(CCR0, R0, access_flags, JVM_ACC_STATIC_BIT); + __ bfalse(CCR0, method_is_not_static); + + // constants = method->constants(); + __ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method); + __ ld(R11_scratch1/*constants*/, in_bytes(ConstMethod::constants_offset()), R11_scratch1); + // pool_holder = method->constants()->pool_holder(); + __ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(), + R11_scratch1/*constants*/); + + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + + // mirror = pool_holder->klass_part()->java_mirror(); + __ ld(R0/*mirror*/, mirror_offset, R11_scratch1/*pool_holder*/); + // state->_native_mirror = mirror; + __ std(R0/*mirror*/, state_(_oop_temp)); + // R4_ARG2 = &state->_oop_temp; + __ addir(R4_ARG2, state_(_oop_temp)); + + __ BIND(method_is_not_static); + } + + // At this point, arguments have been copied off the stack into + // their JNI positions. Oops are boxed in-place on the stack, with + // handles copied to arguments. The result handler address is in a + // register. + + // pass JNIEnv address as first parameter + __ addir(R3_ARG1, thread_(jni_environment)); + + // Load the native_method entry before we change the thread state. + __ ld(native_method_fd, method_(native_function)); + + //============================================================================= + // Transition from _thread_in_Java to _thread_in_native. As soon as + // we make this change the safepoint code needs to be certain that + // the last Java frame we established is good. The pc in that frame + // just needs to be near here not an actual return address. + + // We use release_store_fence to update values like the thread state, where + // we don't want the current thread to continue until all our prior memory + // accesses (including the new thread state) are visible to other threads. + __ li(R0, _thread_in_native); + __ release(); + + // TODO: PPC port: assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); + __ stw(R0, thread_(thread_state)); + + if (UseMembar) { + __ fence(); + } + + //============================================================================= + // Call the native method. Argument registers must not have been + // overwritten since "__ call_stub(signature_handler);" (except for + // ARG1 and ARG2 for static methods) + __ call_c(native_method_fd); + + __ std(R3_RET, state_(_native_lresult)); + __ stfd(F1_RET, state_(_native_fresult)); + + // The frame_manager_lr field, which we use for setting the last + // java frame, gets overwritten by the signature handler. Restore + // it now. + __ get_PC_trash_LR(R11_scratch1); + __ std(R11_scratch1, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + + // Because of GC R19_method may no longer be valid. + + // Block, if necessary, before resuming in _thread_in_Java state. + // In order for GC to work, don't clear the last_Java_sp until after + // blocking. + + + + //============================================================================= + // Switch thread to "native transition" state before reading the + // synchronization state. This additional state is necessary + // because reading and testing the synchronization state is not + // atomic w.r.t. GC, as this scenario demonstrates: Java thread A, + // in _thread_in_native state, loads _not_synchronized and is + // preempted. VM thread changes sync state to synchronizing and + // suspends threads for GC. Thread A is resumed to finish this + // native method, but doesn't block here since it didn't see any + // synchronization in progress, and escapes. + + // We use release_store_fence to update values like the thread state, where + // we don't want the current thread to continue until all our prior memory + // accesses (including the new thread state) are visible to other threads. + __ li(R0/*thread_state*/, _thread_in_native_trans); + __ release(); + __ stw(R0/*thread_state*/, thread_(thread_state)); + if (UseMembar) { + __ fence(); + } + // Write serialization page so that the VM thread can do a pseudo remote + // membar. We use the current thread pointer to calculate a thread + // specific offset to write to within the page. This minimizes bus + // traffic due to cache line collision. + else { + __ serialize_memory(R16_thread, R11_scratch1, R12_scratch2); + } + + // Now before we return to java we must look for a current safepoint + // (a new safepoint can not start since we entered native_trans). + // We must check here because a current safepoint could be modifying + // the callers registers right this moment. + + // Acquire isn't strictly necessary here because of the fence, but + // sync_state is declared to be volatile, so we do it anyway. + __ load_const(sync_state_addr, SafepointSynchronize::address_of_state()); + + // TODO: PPC port: assert(4 == SafepointSynchronize::sz_state(), "unexpected field size"); + __ lwz(sync_state, 0, sync_state_addr); + + // TODO: PPC port: assert(4 == Thread::sz_suspend_flags(), "unexpected field size"); + __ lwz(suspend_flags, thread_(suspend_flags)); + + __ acquire(); + + Label sync_check_done; + Label do_safepoint; + // No synchronization in progress nor yet synchronized + __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized); + // not suspended + __ cmpwi(CCR1, suspend_flags, 0); + + __ bne(CCR0, do_safepoint); + __ beq(CCR1, sync_check_done); + __ bind(do_safepoint); + // Block. We do the call directly and leave the current + // last_Java_frame setup undisturbed. We must save any possible + // native result acrosss the call. No oop is present + + __ mr(R3_ARG1, R16_thread); + __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, JavaThread::check_special_condition_for_native_trans), + relocInfo::none); + __ bind(sync_check_done); + + //============================================================================= + // <<<<<< Back in Interpreter Frame >>>>> + + // We are in thread_in_native_trans here and back in the normal + // interpreter frame. We don't have to do anything special about + // safepoints and we can switch to Java mode anytime we are ready. + + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. For + // native methods it assumes that the non-FPU/non-void result is + // saved in _native_lresult and a FPU result in _native_fresult. If + // this changes then the interpreter_frame_result implementation + // will need to be updated too. + + // On PPC64, we have stored the result directly after the native call. + + //============================================================================= + // back in Java + + // We use release_store_fence to update values like the thread state, where + // we don't want the current thread to continue until all our prior memory + // accesses (including the new thread state) are visible to other threads. + __ li(R0/*thread_state*/, _thread_in_Java); + __ release(); + __ stw(R0/*thread_state*/, thread_(thread_state)); + if (UseMembar) { + __ fence(); + } + + __ reset_last_Java_frame(); + + // Reload GR27_method, call killed it. We can't look at + // state->_method until we're back in java state because in java + // state gc can't happen until we get to a safepoint. + // + // We've set thread_state to _thread_in_Java already, so restoring + // R19_method from R14_state works; R19_method is invalid, because + // GC may have happened. + __ ld(R19_method, state_(_method)); // reload method, may have moved + + // jvmdi/jvmpi support. Whether we've got an exception pending or + // not, and whether unlocking throws an exception or not, we notify + // on native method exit. If we do have an exception, we'll end up + // in the caller's context to handle it, so if we don't do the + // notify here, we'll drop it on the floor. + + __ notify_method_exit(true/*native method*/, + ilgl /*illegal state (not used for native methods)*/); + + + + //============================================================================= + // Handle exceptions + + // See if we must unlock. + // + { + Label method_is_not_synced; + // is_synced is still alive + assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile"); + __ bfalse(is_synced, method_is_not_synced); + + unlock_method(); + + __ bind(method_is_not_synced); + } + + // Reset active handles after returning from native. + // thread->active_handles()->clear(); + __ ld(active_handles, thread_(active_handles)); + // JNIHandleBlock::_top is an int. + // TODO: PPC port: assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size"); + __ li(R0, 0); + __ stw(R0, JNIHandleBlock::top_offset_in_bytes(), active_handles); + + Label no_pending_exception_from_native_method; + __ ld(R0/*pending_exception*/, thread_(pending_exception)); + __ cmpdi(CCR0, R0/*pending_exception*/, 0); + __ beq(CCR0, no_pending_exception_from_native_method); + + + //----------------------------------------------------------------------------- + // An exception is pending. We call into the runtime only if the + // caller was not interpreted. If it was interpreted the + // interpreter will do the correct thing. If it isn't interpreted + // (call stub/compiled code) we will change our return and continue. + __ BIND(exception_return); + + Label return_to_initial_caller_with_pending_exception; + __ cmpdi(CCR0, R15_prev_state, 0); + __ beq(CCR0, return_to_initial_caller_with_pending_exception); + + // We are returning to an interpreter activation, just pop the state, + // pop our frame, leave the exception pending, and return. + __ pop_interpreter_state(/*prev_state_may_be_0=*/false); + __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1 /* set to return pc */, R22_tmp2); + __ mtlr(R21_tmp1); + __ blr(); + + __ BIND(exception_return_sync_check); + + assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile"); + __ bfalse(is_synced, exception_return); + unlock_method(); + __ b(exception_return); + + + __ BIND(return_to_initial_caller_with_pending_exception); + // We are returning to a c2i-adapter / call-stub, get the address of the + // exception handler, pop the frame and return to the handler. + + // First, pop to caller's frame. + __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1 /* set to return pc */, R22_tmp2); + + __ push_frame_abi112(0, R11_scratch1); + // Get the address of the exception handler. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), + R16_thread, + R21_tmp1 /* return pc */); + __ pop_frame(); + + // Load the PC of the the exception handler into LR. + __ mtlr(R3_RET); + + // Load exception into R3_ARG1 and clear pending exception in thread. + __ ld(R3_ARG1/*exception*/, thread_(pending_exception)); + __ li(R4_ARG2, 0); + __ std(R4_ARG2, thread_(pending_exception)); + + // Load the original return pc into R4_ARG2. + __ mr(R4_ARG2/*issuing_pc*/, R21_tmp1); + + // Resize frame to get rid of a potential extension. + __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); + + // Return to exception handler. + __ blr(); + + + //----------------------------------------------------------------------------- + // No exception pending. + __ BIND(no_pending_exception_from_native_method); + + // Move native method result back into proper registers and return. + // Invoke result handler (may unbox/promote). + __ ld(R3_RET, state_(_native_lresult)); + __ lfd(F1_RET, state_(_native_fresult)); + __ call_stub(result_handler_addr); + + // We have created a new BytecodeInterpreter object, now we must destroy it. + // + // Restore previous R14_state and caller's SP. R15_prev_state may + // be 0 here, because our caller may be the call_stub or compiled + // code. + __ pop_interpreter_state(/*prev_state_may_be_0=*/true); + __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1 /* set to return pc */, R22_tmp2); + // Resize frame to get rid of a potential extension. + __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); + + // Must use the return pc which was loaded from the caller's frame + // as the VM uses return-pc-patching for deoptimization. + __ mtlr(R21_tmp1); + __ blr(); + + + + //============================================================================= + // We encountered an exception while computing the interpreter + // state, so R14_state isn't valid. Act as if we just returned from + // the callee method with a pending exception. + __ BIND(stack_overflow_return); + + // + // Register state: + // R14_state invalid; trashed by compute_interpreter_state + // R15_prev_state valid, but may be 0 + // + // R1_SP valid, points to caller's SP; wasn't yet updated by + // compute_interpreter_state + // + + // Create exception oop and make it pending. + + // Throw the exception via RuntimeStub "throw_StackOverflowError_entry". + // + // Previously, we called C-Code directly. As a consequence, a + // possible GC tried to process the argument oops of the top frame + // (see RegisterMap::clear, which sets the corresponding flag to + // true). This lead to crashes because: + // 1. The top register map did not contain locations for the argument registers + // 2. The arguments are dead anyway, could be already overwritten in the worst case + // Solution: Call via special runtime stub that pushes it's own + // frame. This runtime stub has the flag "CodeBlob::caller_must_gc_arguments()" + // set to "false", what prevents the dead arguments getting GC'd. + // + // 2 cases exist: + // 1. We were called by the c2i adapter / call stub + // 2. We were called by the frame manager + // + // Both cases are handled by this code: + // 1. - initial_caller_sp was saved in both cases on entry, so it's safe to load it back even if it was not changed. + // - control flow will be: + // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->excp_blob of caller method + // 2. - control flow will be: + // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->rethrow_excp_entry of frame manager->resume_method + // Since we restored the caller SP above, the rethrow_excp_entry can restore the original interpreter state + // registers using the stack and resume the calling method with a pending excp. + + // Pop any c2i extension from the stack, restore LR just to be sure + __ ld(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + __ mtlr(R0); + // Resize frame to get rid of a potential extension. + __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); + + // Load target address of the runtime stub. + __ load_const(R12_scratch2, (StubRoutines::throw_StackOverflowError_entry())); + __ mtctr(R12_scratch2); + __ bctr(); + + + //============================================================================= + // Counter overflow. + + if (inc_counter) { + // Handle invocation counter overflow + __ bind(invocation_counter_overflow); + + generate_counter_overflow(continue_after_compile); + } + + native_entry = entry; + return entry; +} + +bool AbstractInterpreter::can_be_compiled(methodHandle m) { + // No special entry points that preclude compilation. + return true; +} + +// Unlock the current method. +// +void CppInterpreterGenerator::unlock_method(void) { + // Find preallocated monitor and unlock method. Method monitor is + // the first one. + + // Registers alive + // R14_state + // + // Registers updated + // volatiles + // + const Register monitor = R4_ARG2; + + // Pass address of initial monitor we allocated. + // + // First monitor. + __ addi(monitor, R14_state, -frame::interpreter_frame_monitor_size_in_bytes()); + + // Unlock method + __ unlock_object(monitor); +} + +// Lock the current method. +// +void CppInterpreterGenerator::lock_method(void) { + // Find preallocated monitor and lock method. Method monitor is the + // first one. + + // + // Registers alive + // R14_state + // + // Registers updated + // volatiles + // + + const Register monitor = R4_ARG2; + const Register object = R5_ARG3; + + // Pass address of initial monitor we allocated. + __ addi(monitor, R14_state, -frame::interpreter_frame_monitor_size_in_bytes()); + + // Pass object address. + __ ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor); + + // Lock method. + __ lock_object(monitor, object); +} + +// Generate code for handling resuming a deopted method. +void CppInterpreterGenerator::generate_deopt_handling(Register result_index) { + + //============================================================================= + // Returning from a compiled method into a deopted method. The + // bytecode at the bcp has completed. The result of the bytecode is + // in the native abi (the tosca for the template based + // interpreter). Any stack space that was used by the bytecode that + // has completed has been removed (e.g. parameters for an invoke) so + // all that we have to do is place any pending result on the + // expression stack and resume execution on the next bytecode. + + Label return_from_deopt_common; + + // R3_RET and F1_RET are live here! Load the array index of the + // required result stub address and continue at return_from_deopt_common. + + // Deopt needs to jump to here to enter the interpreter (return a result). + deopt_frame_manager_return_atos = __ pc(); + __ li(result_index, AbstractInterpreter::BasicType_as_index(T_OBJECT)); + __ b(return_from_deopt_common); + + deopt_frame_manager_return_btos = __ pc(); + __ li(result_index, AbstractInterpreter::BasicType_as_index(T_BOOLEAN)); + __ b(return_from_deopt_common); + + deopt_frame_manager_return_itos = __ pc(); + __ li(result_index, AbstractInterpreter::BasicType_as_index(T_INT)); + __ b(return_from_deopt_common); + + deopt_frame_manager_return_ltos = __ pc(); + __ li(result_index, AbstractInterpreter::BasicType_as_index(T_LONG)); + __ b(return_from_deopt_common); + + deopt_frame_manager_return_ftos = __ pc(); + __ li(result_index, AbstractInterpreter::BasicType_as_index(T_FLOAT)); + __ b(return_from_deopt_common); + + deopt_frame_manager_return_dtos = __ pc(); + __ li(result_index, AbstractInterpreter::BasicType_as_index(T_DOUBLE)); + __ b(return_from_deopt_common); + + deopt_frame_manager_return_vtos = __ pc(); + __ li(result_index, AbstractInterpreter::BasicType_as_index(T_VOID)); + // Last one, fall-through to return_from_deopt_common. + + // Deopt return common. An index is present that lets us move any + // possible result being return to the interpreter's stack. + // + __ BIND(return_from_deopt_common); + +} + +// Generate the code to handle a more_monitors message from the c++ interpreter. +void CppInterpreterGenerator::generate_more_monitors() { + + // + // Registers alive + // R16_thread - JavaThread* + // R15_prev_state - previous BytecodeInterpreter or 0 + // R14_state - BytecodeInterpreter* address of receiver's interpreter state + // R1_SP - old stack pointer + // + // Registers updated + // R1_SP - new stack pointer + // + + // Very-local scratch registers. + const Register old_tos = R21_tmp1; + const Register new_tos = R22_tmp2; + const Register stack_base = R23_tmp3; + const Register stack_limit = R24_tmp4; + const Register slot = R25_tmp5; + const Register n_slots = R25_tmp5; + + // Interpreter state fields. + const Register msg = R24_tmp4; + + // Load up relevant interpreter state. + + __ ld(stack_base, state_(_stack_base)); // Old stack_base + __ ld(old_tos, state_(_stack)); // Old tos + __ ld(stack_limit, state_(_stack_limit)); // Old stack_limit + + // extracted monitor_size + int monitor_size = frame::interpreter_frame_monitor_size_in_bytes(); + assert(Assembler::is_aligned((unsigned int)monitor_size, + (unsigned int)frame::alignment_in_bytes), + "size of a monitor must respect alignment of SP"); + + // Save and restore top LR + __ ld(R12_scratch2, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + __ resize_frame(-monitor_size, R11_scratch1);// Allocate space for new monitor + __ std(R12_scratch2, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + // Initial_caller_sp is used as unextended_sp for non initial callers. + __ std(R1_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP); + __ addi(stack_base, stack_base, -monitor_size); // New stack_base + __ addi(new_tos, old_tos, -monitor_size); // New tos + __ addi(stack_limit, stack_limit, -monitor_size); // New stack_limit + + __ std(R1_SP, state_(_last_Java_sp)); // Update frame_bottom + + __ std(stack_base, state_(_stack_base)); // Update stack_base + __ std(new_tos, state_(_stack)); // Update tos + __ std(stack_limit, state_(_stack_limit)); // Update stack_limit + + __ li(msg, BytecodeInterpreter::got_monitors); // Tell interpreter we allocated the lock + __ stw(msg, state_(_msg)); + + // Shuffle expression stack down. Recall that stack_base points + // just above the new expression stack bottom. Old_tos and new_tos + // are used to scan thru the old and new expression stacks. + + Label copy_slot, copy_slot_finished; + __ sub(n_slots, stack_base, new_tos); + __ srdi_(n_slots, n_slots, LogBytesPerWord); // compute number of slots to copy + assert(LogBytesPerWord == 3, "conflicts assembler instructions"); + __ beq(CCR0, copy_slot_finished); // nothing to copy + + __ mtctr(n_slots); + + // loop + __ bind(copy_slot); + __ ldu(slot, BytesPerWord, old_tos); // slot = *++old_tos; + __ stdu(slot, BytesPerWord, new_tos); // *++new_tos = slot; + __ bdnz(copy_slot); + + __ bind(copy_slot_finished); + + // Restart interpreter + __ li(R0, 0); + __ std(R0, BasicObjectLock::obj_offset_in_bytes(), stack_base); // Mark lock as unused +} + +address CppInterpreterGenerator::generate_normal_entry(void) { + if (interpreter_frame_manager != NULL) return interpreter_frame_manager; + + address entry = __ pc(); + + address return_from_native_pc = (address) NULL; + + // Initial entry to frame manager (from call_stub or c2i_adapter) + + // + // Registers alive + // R16_thread - JavaThread* + // R19_method - callee's Method (method to be invoked) + // R17_tos - address of sender tos (prepushed) + // R1_SP - SP prepared by call stub such that caller's outgoing args are near top + // LR - return address to caller (call_stub or c2i_adapter) + // R21_sender_SP - initial caller sp + // + // Registers updated + // R15_prev_state - 0 + // + // Stack layout at this point: + // + // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP + // alignment (optional) + // [outgoing Java arguments] <-- R17_tos + // ... + // PARENT [PARENT_IJAVA_FRAME_ABI] + // ... + // + + // Save initial_caller_sp to caller's abi. + // The caller frame must be resized before returning to get rid of + // the c2i part on top of the calling compiled frame (if any). + // R21_tmp1 must match sender_sp in gen_c2i_adapter. + // Now override the saved SP with the senderSP so we can pop c2i + // arguments (if any) off when we return. + __ std(R21_sender_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP); + + // Save LR to caller's frame. We don't use _abi(lr) here, + // because it is not safe. + __ mflr(R0); + __ std(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + + // If we come here, it is the first invocation of the frame manager. + // So there is no previous interpreter state. + __ li(R15_prev_state, 0); + + + // Fall through to where "recursive" invocations go. + + //============================================================================= + // Dispatch an instance of the interpreter. Recursive activations + // come here. + + Label re_dispatch; + __ BIND(re_dispatch); + + // + // Registers alive + // R16_thread - JavaThread* + // R19_method - callee's Method + // R17_tos - address of caller's tos (prepushed) + // R15_prev_state - address of caller's BytecodeInterpreter or 0 + // R1_SP - caller's SP trimmed such that caller's outgoing args are near top. + // + // Stack layout at this point: + // + // 0 [TOP_IJAVA_FRAME_ABI] + // alignment (optional) + // [outgoing Java arguments] + // ... + // PARENT [PARENT_IJAVA_FRAME_ABI] + // ... + + // fall through to interpreted execution + + //============================================================================= + // Allocate a new Java frame and initialize the new interpreter state. + + Label stack_overflow_return; + + // Create a suitable new Java frame plus a new BytecodeInterpreter instance + // in the current (frame manager's) C frame. + generate_compute_interpreter_state(stack_overflow_return); + + // fall through + + //============================================================================= + // Interpreter dispatch. + + Label call_interpreter; + __ BIND(call_interpreter); + + // + // Registers alive + // R16_thread - JavaThread* + // R15_prev_state - previous BytecodeInterpreter or 0 + // R14_state - address of receiver's BytecodeInterpreter + // R1_SP - receiver's stack pointer + // + + // Thread fields. + const Register pending_exception = R21_tmp1; + + // Interpreter state fields. + const Register msg = R24_tmp4; + + // MethodOop fields. + const Register parameter_count = R25_tmp5; + const Register result_index = R26_tmp6; + + const Register dummy = R28_tmp8; + + // Address of various interpreter stubs. + // R29_tmp9 is reserved. + const Register stub_addr = R27_tmp7; + + // Uncommon trap needs to jump to here to enter the interpreter + // (re-execute current bytecode). + unctrap_frame_manager_entry = __ pc(); + + // If we are profiling, store our fp (BSP) in the thread so we can + // find it during a tick. + if (Arguments::has_profile()) { + // On PPC64 we store the pointer to the current BytecodeInterpreter, + // instead of the bsp of ia64. This should suffice to be able to + // find all interesting information. + __ std(R14_state, thread_(last_interpreter_fp)); + } + + // R16_thread, R14_state and R15_prev_state are nonvolatile + // registers. There is no need to save these. If we needed to save + // some state in the current Java frame, this could be a place to do + // so. + + // Call Java bytecode dispatcher passing "BytecodeInterpreter* istate". + __ call_VM_leaf(CAST_FROM_FN_PTR(address, + JvmtiExport::can_post_interpreter_events() + ? BytecodeInterpreter::runWithChecks + : BytecodeInterpreter::run), + R14_state); + + interpreter_return_address = __ last_calls_return_pc(); + + // R16_thread, R14_state and R15_prev_state have their values preserved. + + // If we are profiling, clear the fp in the thread to tell + // the profiler that we are no longer in the interpreter. + if (Arguments::has_profile()) { + __ li(R11_scratch1, 0); + __ std(R11_scratch1, thread_(last_interpreter_fp)); + } + + // Load message from bytecode dispatcher. + // TODO: PPC port: guarantee(4 == BytecodeInterpreter::sz_msg(), "unexpected field size"); + __ lwz(msg, state_(_msg)); + + + Label more_monitors; + Label return_from_native; + Label return_from_native_common; + Label return_from_native_no_exception; + Label return_from_interpreted_method; + Label return_from_recursive_activation; + Label unwind_recursive_activation; + Label resume_interpreter; + Label return_to_initial_caller; + Label unwind_initial_activation; + Label unwind_initial_activation_pending_exception; + Label call_method; + Label call_special; + Label retry_method; + Label retry_method_osr; + Label popping_frame; + Label throwing_exception; + + // Branch according to the received message + + __ cmpwi(CCR1, msg, BytecodeInterpreter::call_method); + __ cmpwi(CCR2, msg, BytecodeInterpreter::return_from_method); + + __ beq(CCR1, call_method); + __ beq(CCR2, return_from_interpreted_method); + + __ cmpwi(CCR3, msg, BytecodeInterpreter::more_monitors); + __ cmpwi(CCR4, msg, BytecodeInterpreter::throwing_exception); + + __ beq(CCR3, more_monitors); + __ beq(CCR4, throwing_exception); + + __ cmpwi(CCR5, msg, BytecodeInterpreter::popping_frame); + __ cmpwi(CCR6, msg, BytecodeInterpreter::do_osr); + + __ beq(CCR5, popping_frame); + __ beq(CCR6, retry_method_osr); + + __ stop("bad message from interpreter"); + + + //============================================================================= + // Add a monitor just below the existing one(s). State->_stack_base + // points to the lowest existing one, so we insert the new one just + // below it and shuffle the expression stack down. Ref. the above + // stack layout picture, we must update _stack_base, _stack, _stack_limit + // and _last_Java_sp in the interpreter state. + + __ BIND(more_monitors); + + generate_more_monitors(); + __ b(call_interpreter); + + generate_deopt_handling(result_index); + + // Restoring the R14_state is already done by the deopt_blob. + + // Current tos includes no parameter slots. + __ ld(R17_tos, state_(_stack)); + __ li(msg, BytecodeInterpreter::deopt_resume); + __ b(return_from_native_common); + + // We are sent here when we are unwinding from a native method or + // adapter with an exception pending. We need to notify the interpreter + // that there is an exception to process. + // We arrive here also if the frame manager called an (interpreted) target + // which returns with a StackOverflow exception. + // The control flow is in this case is: + // frame_manager->throw_excp_stub->forward_excp->rethrow_excp_entry + + AbstractInterpreter::_rethrow_exception_entry = __ pc(); + + // Restore R14_state. + __ ld(R14_state, 0, R1_SP); + __ addi(R14_state, R14_state, + -frame::interpreter_frame_cinterpreterstate_size_in_bytes()); + + // Store exception oop into thread object. + __ std(R3_RET, thread_(pending_exception)); + __ li(msg, BytecodeInterpreter::method_resume /*rethrow_exception*/); + // + // NOTE: the interpreter frame as setup be deopt does NOT include + // any parameter slots (good thing since we have no callee here + // and couldn't remove them) so we don't have to do any calculations + // here to figure it out. + // + __ ld(R17_tos, state_(_stack)); + __ b(return_from_native_common); + + + //============================================================================= + // Returning from a native method. Result is in the native abi + // location so we must move it to the java expression stack. + + __ BIND(return_from_native); + guarantee(return_from_native_pc == (address) NULL, "precondition"); + return_from_native_pc = __ pc(); + + // Restore R14_state. + __ ld(R14_state, 0, R1_SP); + __ addi(R14_state, R14_state, + -frame::interpreter_frame_cinterpreterstate_size_in_bytes()); + + // + // Registers alive + // R16_thread + // R14_state - address of caller's BytecodeInterpreter. + // R3_RET - integer result, if any. + // F1_RET - float result, if any. + // + // Registers updated + // R19_method - callee's Method + // R17_tos - caller's tos, with outgoing args popped + // result_index - index of result handler. + // msg - message for resuming interpreter. + // + + // Very-local scratch registers. + + const ConditionRegister have_pending_exception = CCR0; + + // Load callee Method, gc may have moved it. + __ ld(R19_method, state_(_result._to_call._callee)); + + // Load address of caller's tos. includes parameter slots. + __ ld(R17_tos, state_(_stack)); + + // Pop callee's parameters. + + __ ld(parameter_count, in_bytes(Method::const_offset()), R19_method); + __ lhz(parameter_count, in_bytes(ConstMethod::size_of_parameters_offset()), parameter_count); + __ sldi(parameter_count, parameter_count, Interpreter::logStackElementSize); + __ add(R17_tos, R17_tos, parameter_count); + + // Result stub address array index + // TODO: PPC port: assert(4 == methodOopDesc::sz_result_index(), "unexpected field size"); + __ lwa(result_index, method_(result_index)); + + __ li(msg, BytecodeInterpreter::method_resume); + + // + // Registers alive + // R16_thread + // R14_state - address of caller's BytecodeInterpreter. + // R17_tos - address of caller's tos with outgoing args already popped + // R3_RET - integer return value, if any. + // F1_RET - float return value, if any. + // result_index - index of result handler. + // msg - message for resuming interpreter. + // + // Registers updated + // R3_RET - new address of caller's tos, including result, if any + // + + __ BIND(return_from_native_common); + + // Check for pending exception + __ ld(pending_exception, thread_(pending_exception)); + __ cmpdi(CCR0, pending_exception, 0); + __ beq(CCR0, return_from_native_no_exception); + + // If there's a pending exception, we really have no result, so + // R3_RET is dead. Resume_interpreter assumes the new tos is in + // R3_RET. + __ mr(R3_RET, R17_tos); + // `resume_interpreter' expects R15_prev_state to be alive. + __ ld(R15_prev_state, state_(_prev_link)); + __ b(resume_interpreter); + + __ BIND(return_from_native_no_exception); + + // No pending exception, copy method result from native ABI register + // to tos. + + // Address of stub descriptor address array. + __ load_const(stub_addr, CppInterpreter::tosca_result_to_stack()); + + // Pass address of tos to stub. + __ mr(R4_ARG2, R17_tos); + + // Address of stub descriptor address. + __ sldi(result_index, result_index, LogBytesPerWord); + __ add(stub_addr, stub_addr, result_index); + + // Stub descriptor address. + __ ld(stub_addr, 0, stub_addr); + + // TODO: don't do this via a call, do it in place! + // + // call stub via descriptor + // in R3_ARG1/F1_ARG1: result value (R3_RET or F1_RET) + __ call_stub(stub_addr); + + // new tos = result of call in R3_RET + + // `resume_interpreter' expects R15_prev_state to be alive. + __ ld(R15_prev_state, state_(_prev_link)); + __ b(resume_interpreter); + + //============================================================================= + // We encountered an exception while computing the interpreter + // state, so R14_state isn't valid. Act as if we just returned from + // the callee method with a pending exception. + __ BIND(stack_overflow_return); + + // + // Registers alive + // R16_thread - JavaThread* + // R1_SP - old stack pointer + // R19_method - callee's Method + // R17_tos - address of caller's tos (prepushed) + // R15_prev_state - address of caller's BytecodeInterpreter or 0 + // R18_locals - address of callee's locals array + // + // Registers updated + // R3_RET - address of resuming tos, if recursive unwind + + Label Lskip_unextend_SP; + + { + const ConditionRegister is_initial_call = CCR0; + const Register tos_save = R21_tmp1; + const Register tmp = R22_tmp2; + + assert(tos_save->is_nonvolatile(), "need a nonvolatile"); + + // Is the exception thrown in the initial Java frame of this frame + // manager frame? + __ cmpdi(is_initial_call, R15_prev_state, 0); + __ bne(is_initial_call, Lskip_unextend_SP); + + // Pop any c2i extension from the stack. This is necessary in the + // non-recursive case (that is we were called by the c2i adapter, + // meaning we have to prev state). In this case we entered the frame + // manager through a special entry which pushes the orignal + // unextended SP to the stack. Here we load it back. + __ ld(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + __ mtlr(R0); + // Resize frame to get rid of a potential extension. + __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); + + // Fall through + + __ bind(Lskip_unextend_SP); + + // Throw the exception via RuntimeStub "throw_StackOverflowError_entry". + // + // Previously, we called C-Code directly. As a consequence, a + // possible GC tried to process the argument oops of the top frame + // (see RegisterMap::clear, which sets the corresponding flag to + // true). This lead to crashes because: + // 1. The top register map did not contain locations for the argument registers + // 2. The arguments are dead anyway, could be already overwritten in the worst case + // Solution: Call via special runtime stub that pushes it's own frame. This runtime stub has the flag + // "CodeBlob::caller_must_gc_arguments()" set to "false", what prevents the dead arguments getting GC'd. + // + // 2 cases exist: + // 1. We were called by the c2i adapter / call stub + // 2. We were called by the frame manager + // + // Both cases are handled by this code: + // 1. - initial_caller_sp was saved on stack => Load it back and we're ok + // - control flow will be: + // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->excp_blob of calling method + // 2. - control flow will be: + // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep-> + // ->rethrow_excp_entry of frame manager->resume_method + // Since we restored the caller SP above, the rethrow_excp_entry can restore the original interpreter state + // registers using the stack and resume the calling method with a pending excp. + + __ load_const(R3_ARG1, (StubRoutines::throw_StackOverflowError_entry())); + __ mtctr(R3_ARG1); + __ bctr(); + } + //============================================================================= + // We have popped a frame from an interpreted call. We are assured + // of returning to an interpreted call by the popframe abi. We have + // no return value all we have to do is pop the current frame and + // then make sure that the top of stack (of the caller) gets set to + // where it was when we entered the callee (i.e. the args are still + // in place). Or we are returning to the interpreter. In the first + // case we must extract result (if any) from the java expression + // stack and store it in the location the native abi would expect + // for a call returning this type. In the second case we must simply + // do a stack to stack move as we unwind. + + __ BIND(popping_frame); + + // Registers alive + // R14_state + // R15_prev_state + // R17_tos + // + // Registers updated + // R19_method + // R3_RET + // msg + { + Label L; + + // Reload callee method, gc may have moved it. + __ ld(R19_method, state_(_method)); + + // We may be returning to a deoptimized frame in which case the + // usual assumption of a recursive return is not true. + + // not equal = is recursive call + __ cmpdi(CCR0, R15_prev_state, 0); + + __ bne(CCR0, L); + + // Pop_frame capability. + // The pop_frame api says that the underlying frame is a Java frame, in this case + // (prev_state==null) it must be a compiled frame: + // + // Stack at this point: I, C2I + C, ... + // + // The outgoing arguments of the call have just been copied (popframe_preserve_args). + // By the pop_frame api, we must end up in an interpreted frame. So the compiled frame + // will be deoptimized. Deoptimization will restore the outgoing arguments from + // popframe_preserve_args, adjust the tos such that it includes the popframe_preserve_args, + // and adjust the bci such that the call will be executed again. + // We have no results, just pop the interpreter frame, resize the compiled frame to get rid + // of the c2i extension and return to the deopt_handler. + __ b(unwind_initial_activation); + + // is recursive call + __ bind(L); + + // Resume_interpreter expects the original tos in R3_RET. + __ ld(R3_RET, prev_state_(_stack)); + + // We're done. + __ li(msg, BytecodeInterpreter::popping_frame); + + __ b(unwind_recursive_activation); + } + + + //============================================================================= + + // We have finished an interpreted call. We are either returning to + // native (call_stub/c2) or we are returning to the interpreter. + // When returning to native, we must extract the result (if any) + // from the java expression stack and store it in the location the + // native abi expects. When returning to the interpreter we must + // simply do a stack to stack move as we unwind. + + __ BIND(return_from_interpreted_method); + + // + // Registers alive + // R16_thread - JavaThread* + // R15_prev_state - address of caller's BytecodeInterpreter or 0 + // R14_state - address of callee's interpreter state + // R1_SP - callee's stack pointer + // + // Registers updated + // R19_method - callee's method + // R3_RET - address of result (new caller's tos), + // + // if returning to interpreted + // msg - message for interpreter, + // if returning to interpreted + // + + // Check if this is the initial invocation of the frame manager. + // If so, R15_prev_state will be null. + __ cmpdi(CCR0, R15_prev_state, 0); + + // Reload callee method, gc may have moved it. + __ ld(R19_method, state_(_method)); + + // Load the method's result type. + __ lwz(result_index, method_(result_index)); + + // Go to return_to_initial_caller if R15_prev_state is null. + __ beq(CCR0, return_to_initial_caller); + + // Copy callee's result to caller's expression stack via inline stack-to-stack + // converters. + { + Register new_tos = R3_RET; + Register from_temp = R4_ARG2; + Register from = R5_ARG3; + Register tos = R6_ARG4; + Register tmp1 = R7_ARG5; + Register tmp2 = R8_ARG6; + + ConditionRegister result_type_is_void = CCR1; + ConditionRegister result_type_is_long = CCR2; + ConditionRegister result_type_is_double = CCR3; + + Label stack_to_stack_void; + Label stack_to_stack_double_slot; // T_LONG, T_DOUBLE + Label stack_to_stack_single_slot; // T_BOOLEAN, T_BYTE, T_CHAR, T_SHORT, T_INT, T_FLOAT, T_OBJECT + Label stack_to_stack_done; + + // Pass callee's address of tos + BytesPerWord + __ ld(from_temp, state_(_stack)); + + // result type: void + __ cmpwi(result_type_is_void, result_index, AbstractInterpreter::BasicType_as_index(T_VOID)); + + // Pass caller's tos == callee's locals address + __ ld(tos, state_(_locals)); + + // result type: long + __ cmpwi(result_type_is_long, result_index, AbstractInterpreter::BasicType_as_index(T_LONG)); + + __ addi(from, from_temp, Interpreter::stackElementSize); + + // !! don't branch above this line !! + + // handle void + __ beq(result_type_is_void, stack_to_stack_void); + + // result type: double + __ cmpwi(result_type_is_double, result_index, AbstractInterpreter::BasicType_as_index(T_DOUBLE)); + + // handle long or double + __ beq(result_type_is_long, stack_to_stack_double_slot); + __ beq(result_type_is_double, stack_to_stack_double_slot); + + // fall through to single slot types (incl. object) + + { + __ BIND(stack_to_stack_single_slot); + // T_BOOLEAN, T_BYTE, T_CHAR, T_SHORT, T_INT, T_FLOAT, T_OBJECT + + __ ld(tmp1, 0, from); + __ std(tmp1, 0, tos); + // New expression stack top + __ addi(new_tos, tos, - BytesPerWord); + + __ b(stack_to_stack_done); + } + + { + __ BIND(stack_to_stack_double_slot); + // T_LONG, T_DOUBLE + + // Move both entries for debug purposes even though only one is live + __ ld(tmp1, BytesPerWord, from); + __ ld(tmp2, 0, from); + __ std(tmp1, 0, tos); + __ std(tmp2, -BytesPerWord, tos); + + // new expression stack top + __ addi(new_tos, tos, - 2 * BytesPerWord); // two slots + __ b(stack_to_stack_done); + } + + { + __ BIND(stack_to_stack_void); + // T_VOID + + // new expression stack top + __ mr(new_tos, tos); + // fall through to stack_to_stack_done + } + + __ BIND(stack_to_stack_done); + } + + // new tos = R3_RET + + // Get the message for the interpreter + __ li(msg, BytecodeInterpreter::method_resume); + + // And fall thru + + + //============================================================================= + // Restore caller's interpreter state and pass pointer to caller's + // new tos to caller. + + __ BIND(unwind_recursive_activation); + + // + // Registers alive + // R15_prev_state - address of caller's BytecodeInterpreter + // R3_RET - address of caller's tos + // msg - message for caller's BytecodeInterpreter + // R1_SP - callee's stack pointer + // + // Registers updated + // R14_state - address of caller's BytecodeInterpreter + // R15_prev_state - address of its parent or 0 + // + + // Pop callee's interpreter and set R14_state to caller's interpreter. + __ pop_interpreter_state(/*prev_state_may_be_0=*/false); + + // And fall thru + + + //============================================================================= + // Resume the (calling) interpreter after a call. + + __ BIND(resume_interpreter); + + // + // Registers alive + // R14_state - address of resuming BytecodeInterpreter + // R15_prev_state - address of its parent or 0 + // R3_RET - address of resuming tos + // msg - message for resuming interpreter + // R1_SP - callee's stack pointer + // + // Registers updated + // R1_SP - caller's stack pointer + // + + // Restore C stack pointer of caller (resuming interpreter), + // R14_state already points to the resuming BytecodeInterpreter. + __ pop_interpreter_frame_to_state(R14_state, R21_tmp1, R11_scratch1, R12_scratch2); + + // Store new address of tos (holding return value) in interpreter state. + __ std(R3_RET, state_(_stack)); + + // Store message for interpreter. + __ stw(msg, state_(_msg)); + + __ b(call_interpreter); + + //============================================================================= + // Interpreter returning to native code (call_stub/c1/c2) from + // initial activation. Convert stack result and unwind activation. + + __ BIND(return_to_initial_caller); + + // + // Registers alive + // R19_method - callee's Method + // R14_state - address of callee's interpreter state + // R16_thread - JavaThread + // R1_SP - callee's stack pointer + // + // Registers updated + // R3_RET/F1_RET - result in expected output register + // + + // If we have an exception pending we have no result and we + // must figure out where to really return to. + // + __ ld(pending_exception, thread_(pending_exception)); + __ cmpdi(CCR0, pending_exception, 0); + __ bne(CCR0, unwind_initial_activation_pending_exception); + + __ lwa(result_index, method_(result_index)); + + // Address of stub descriptor address array. + __ load_const(stub_addr, CppInterpreter::stack_result_to_native()); + + // Pass address of callee's tos + BytesPerWord. + // Will then point directly to result. + __ ld(R3_ARG1, state_(_stack)); + __ addi(R3_ARG1, R3_ARG1, Interpreter::stackElementSize); + + // Address of stub descriptor address + __ sldi(result_index, result_index, LogBytesPerWord); + __ add(stub_addr, stub_addr, result_index); + + // Stub descriptor address + __ ld(stub_addr, 0, stub_addr); + + // TODO: don't do this via a call, do it in place! + // + // call stub via descriptor + __ call_stub(stub_addr); + + __ BIND(unwind_initial_activation); + + // Unwind from initial activation. No exception is pending. + + // + // Stack layout at this point: + // + // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP + // ... + // CALLER [PARENT_IJAVA_FRAME_ABI] + // ... + // CALLER [unextended ABI] + // ... + // + // The CALLER frame has a C2I adapter or is an entry-frame. + // + + // An interpreter frame exists, we may pop the TOP_IJAVA_FRAME and + // turn the caller's PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME. + // But, we simply restore the return pc from the caller's frame and + // use the caller's initial_caller_sp as the new SP which pops the + // interpreter frame and "resizes" the caller's frame to its "unextended" + // size. + + // get rid of top frame + __ pop_frame(); + + // Load return PC from parent frame. + __ ld(R21_tmp1, _parent_ijava_frame_abi(lr), R1_SP); + + // Resize frame to get rid of a potential extension. + __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); + + // update LR + __ mtlr(R21_tmp1); + + // return + __ blr(); + + //============================================================================= + // Unwind from initial activation. An exception is pending + + __ BIND(unwind_initial_activation_pending_exception); + + // + // Stack layout at this point: + // + // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP + // ... + // CALLER [PARENT_IJAVA_FRAME_ABI] + // ... + // CALLER [unextended ABI] + // ... + // + // The CALLER frame has a C2I adapter or is an entry-frame. + // + + // An interpreter frame exists, we may pop the TOP_IJAVA_FRAME and + // turn the caller's PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME. + // But, we just pop the current TOP_IJAVA_FRAME and fall through + + __ pop_frame(); + __ ld(R3_ARG1, _top_ijava_frame_abi(lr), R1_SP); + + // + // Stack layout at this point: + // + // CALLER [PARENT_IJAVA_FRAME_ABI] <-- R1_SP + // ... + // CALLER [unextended ABI] + // ... + // + // The CALLER frame has a C2I adapter or is an entry-frame. + // + // Registers alive + // R16_thread + // R3_ARG1 - return address to caller + // + // Registers updated + // R3_ARG1 - address of pending exception + // R4_ARG2 - issuing pc = return address to caller + // LR - address of exception handler stub + // + + // Resize frame to get rid of a potential extension. + __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); + + __ mr(R14, R3_ARG1); // R14 := ARG1 + __ mr(R4_ARG2, R3_ARG1); // ARG2 := ARG1 + + // Find the address of the "catch_exception" stub. + __ push_frame_abi112(0, R11_scratch1); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), + R16_thread, + R4_ARG2); + __ pop_frame(); + + // Load continuation address into LR. + __ mtlr(R3_RET); + + // Load address of pending exception and clear it in thread object. + __ ld(R3_ARG1/*R3_RET*/, thread_(pending_exception)); + __ li(R4_ARG2, 0); + __ std(R4_ARG2, thread_(pending_exception)); + + // re-load issuing pc + __ mr(R4_ARG2, R14); + + // Branch to found exception handler. + __ blr(); + + //============================================================================= + // Call a new method. Compute new args and trim the expression stack + // to only what we are currently using and then recurse. + + __ BIND(call_method); + + // + // Registers alive + // R16_thread + // R14_state - address of caller's BytecodeInterpreter + // R1_SP - caller's stack pointer + // + // Registers updated + // R15_prev_state - address of caller's BytecodeInterpreter + // R17_tos - address of caller's tos + // R19_method - callee's Method + // R1_SP - trimmed back + // + + // Very-local scratch registers. + + const Register offset = R21_tmp1; + const Register tmp = R22_tmp2; + const Register self_entry = R23_tmp3; + const Register stub_entry = R24_tmp4; + + const ConditionRegister cr = CCR0; + + // Load the address of the frame manager. + __ load_const(self_entry, &interpreter_frame_manager); + __ ld(self_entry, 0, self_entry); + + // Load BytecodeInterpreter._result._to_call._callee (callee's Method). + __ ld(R19_method, state_(_result._to_call._callee)); + // Load BytecodeInterpreter._stack (outgoing tos). + __ ld(R17_tos, state_(_stack)); + + // Save address of caller's BytecodeInterpreter. + __ mr(R15_prev_state, R14_state); + + // Load the callee's entry point. + // Load BytecodeInterpreter._result._to_call._callee_entry_point. + __ ld(stub_entry, state_(_result._to_call._callee_entry_point)); + + // Check whether stub_entry is equal to self_entry. + __ cmpd(cr, self_entry, stub_entry); + // if (self_entry == stub_entry) + // do a re-dispatch + __ beq(cr, re_dispatch); + // else + // call the specialized entry (adapter for jni or compiled code) + __ BIND(call_special); + + // + // Call the entry generated by `InterpreterGenerator::generate_native_entry'. + // + // Registers alive + // R16_thread + // R15_prev_state - address of caller's BytecodeInterpreter + // R19_method - callee's Method + // R17_tos - address of caller's tos + // R1_SP - caller's stack pointer + // + + // Mark return from specialized entry for generate_native_entry. + guarantee(return_from_native_pc != (address) NULL, "precondition"); + frame_manager_specialized_return = return_from_native_pc; + + // Set sender_SP in case we call interpreter native wrapper which + // will expect it. Compiled code should not care. + __ mr(R21_sender_SP, R1_SP); + + // Do a tail call here, and let the link register point to + // frame_manager_specialized_return which is return_from_native_pc. + __ load_const(tmp, frame_manager_specialized_return); + __ call_stub_and_return_to(stub_entry, tmp /* return_pc=tmp */); + + + //============================================================================= + // + // InterpretMethod triggered OSR compilation of some Java method M + // and now asks to run the compiled code. We call this code the + // `callee'. + // + // This is our current idea on how OSR should look like on PPC64: + // + // While interpreting a Java method M the stack is: + // + // (InterpretMethod (M), IJAVA_FRAME (M), ANY_FRAME, ...). + // + // After having OSR compiled M, `InterpretMethod' returns to the + // frame manager, sending the message `retry_method_osr'. The stack + // is: + // + // (IJAVA_FRAME (M), ANY_FRAME, ...). + // + // The compiler will have generated an `nmethod' suitable for + // continuing execution of M at the bytecode index at which OSR took + // place. So now the frame manager calls the OSR entry. The OSR + // entry sets up a JIT_FRAME for M and continues execution of M with + // initial state determined by the IJAVA_FRAME. + // + // (JIT_FRAME (M), IJAVA_FRAME (M), ANY_FRAME, ...). + // + + __ BIND(retry_method_osr); + { + // + // Registers alive + // R16_thread + // R15_prev_state - address of caller's BytecodeInterpreter + // R14_state - address of callee's BytecodeInterpreter + // R1_SP - callee's SP before call to InterpretMethod + // + // Registers updated + // R17 - pointer to callee's locals array + // (declared via `interpreter_arg_ptr_reg' in the AD file) + // R19_method - callee's Method + // R1_SP - callee's SP (will become SP of OSR adapter frame) + // + + // Provide a debugger breakpoint in the frame manager if breakpoints + // in osr'd methods are requested. +#ifdef COMPILER2 + NOT_PRODUCT( if (OptoBreakpointOSR) { __ illtrap(); } ) +#endif + + // Load callee's pointer to locals array from callee's state. + // __ ld(R17, state_(_locals)); + + // Load osr entry. + __ ld(R12_scratch2, state_(_result._osr._osr_entry)); + + // Load address of temporary osr buffer to arg1. + __ ld(R3_ARG1, state_(_result._osr._osr_buf)); + __ mtctr(R12_scratch2); + + // Load method oop, gc may move it during execution of osr'd method. + __ ld(R22_tmp2, state_(_method)); + // Load message 'call_method'. + __ li(R23_tmp3, BytecodeInterpreter::call_method); + + { + // Pop the IJAVA frame of the method which we are going to call osr'd. + Label no_state, skip_no_state; + __ pop_interpreter_state(/*prev_state_may_be_0=*/true); + __ cmpdi(CCR0, R14_state,0); + __ beq(CCR0, no_state); + // return to interpreter + __ pop_interpreter_frame_to_state(R14_state, R11_scratch1, R12_scratch2, R21_tmp1); + + // Init _result._to_call._callee and tell gc that it contains a valid oop + // by setting _msg to 'call_method'. + __ std(R22_tmp2, state_(_result._to_call._callee)); + // TODO: PPC port: assert(4 == BytecodeInterpreter::sz_msg(), "unexpected field size"); + __ stw(R23_tmp3, state_(_msg)); + + __ load_const(R21_tmp1, frame_manager_specialized_return); + __ b(skip_no_state); + __ bind(no_state); + + // Return to initial caller. + + // Get rid of top frame. + __ pop_frame(); + + // Load return PC from parent frame. + __ ld(R21_tmp1, _parent_ijava_frame_abi(lr), R1_SP); + + // Resize frame to get rid of a potential extension. + __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); + + __ bind(skip_no_state); + + // Update LR with return pc. + __ mtlr(R21_tmp1); + } + // Jump to the osr entry point. + __ bctr(); + + } + + //============================================================================= + // Interpreted method "returned" with an exception, pass it on. + // Pass no result, unwind activation and continue/return to + // interpreter/call_stub/c2. + + __ BIND(throwing_exception); + + // Check if this is the initial invocation of the frame manager. If + // so, previous interpreter state in R15_prev_state will be null. + + // New tos of caller is callee's first parameter address, that is + // callee's incoming arguments are popped. + __ ld(R3_RET, state_(_locals)); + + // Check whether this is an initial call. + __ cmpdi(CCR0, R15_prev_state, 0); + // Yes, called from the call stub or from generated code via a c2i frame. + __ beq(CCR0, unwind_initial_activation_pending_exception); + + // Send resume message, interpreter will see the exception first. + + __ li(msg, BytecodeInterpreter::method_resume); + __ b(unwind_recursive_activation); + + + //============================================================================= + // Push the last instruction out to the code buffer. + + { + __ unimplemented("end of InterpreterGenerator::generate_normal_entry", 128); + } + + interpreter_frame_manager = entry; + return interpreter_frame_manager; +} + +// Generate code for various sorts of method entries +// +address AbstractInterpreterGenerator::generate_method_entry(AbstractInterpreter::MethodKind kind) { + address entry_point = NULL; + + switch (kind) { + case Interpreter::zerolocals : break; + case Interpreter::zerolocals_synchronized : break; + case Interpreter::native : // Fall thru + case Interpreter::native_synchronized : entry_point = ((CppInterpreterGenerator*)this)->generate_native_entry(); break; + case Interpreter::empty : break; + case Interpreter::accessor : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry(); break; + case Interpreter::abstract : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry(); break; + // These are special interpreter intrinsics which we don't support so far. + case Interpreter::java_lang_math_sin : break; + case Interpreter::java_lang_math_cos : break; + case Interpreter::java_lang_math_tan : break; + case Interpreter::java_lang_math_abs : break; + case Interpreter::java_lang_math_log : break; + case Interpreter::java_lang_math_log10 : break; + case Interpreter::java_lang_math_sqrt : break; + case Interpreter::java_lang_math_pow : break; + case Interpreter::java_lang_math_exp : break; + case Interpreter::java_lang_ref_reference_get: entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break; + default : ShouldNotReachHere(); break; + } + + if (entry_point) { + return entry_point; + } + return ((InterpreterGenerator*)this)->generate_normal_entry(); +} + +InterpreterGenerator::InterpreterGenerator(StubQueue* code) + : CppInterpreterGenerator(code) { + generate_all(); // down here so it can be "virtual" +} + +// How much stack a topmost interpreter method activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + // Computation is in bytes not words to match layout_activation_impl + // below, but the return is in words. + + // + // 0 [TOP_IJAVA_FRAME_ABI] \ + // alignment (optional) \ | + // [operand stack / Java parameters] > stack | | + // [monitors] (optional) > monitors | | + // [PARENT_IJAVA_FRAME_ABI] \ | | + // [BytecodeInterpreter object] > interpreter \ | | | + // alignment (optional) | round | parent | round | top + // [Java result] (2 slots) > result | | | | + // [Java non-arg locals] \ locals | | | | + // [arg locals] / / / / / + // + + int locals = method->max_locals() * BytesPerWord; + int interpreter = frame::interpreter_frame_cinterpreterstate_size_in_bytes(); + int result = 2 * BytesPerWord; + + int parent = round_to(interpreter + result + locals, 16) + frame::parent_ijava_frame_abi_size; + + int stack = method->max_stack() * BytesPerWord; + int monitors = method->is_synchronized() ? frame::interpreter_frame_monitor_size_in_bytes() : 0; + int top = round_to(parent + monitors + stack, 16) + frame::top_ijava_frame_abi_size; + + return (top / BytesPerWord); +} + +void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill, + frame* caller, + frame* current, + Method* method, + intptr_t* locals, + intptr_t* stack, + intptr_t* stack_base, + intptr_t* monitor_base, + intptr_t* frame_sp, + bool is_top_frame) { + // What about any vtable? + // + to_fill->_thread = JavaThread::current(); + // This gets filled in later but make it something recognizable for now. + to_fill->_bcp = method->code_base(); + to_fill->_locals = locals; + to_fill->_constants = method->constants()->cache(); + to_fill->_method = method; + to_fill->_mdx = NULL; + to_fill->_stack = stack; + + if (is_top_frame && JavaThread::current()->popframe_forcing_deopt_reexecution()) { + to_fill->_msg = deopt_resume2; + } else { + to_fill->_msg = method_resume; + } + to_fill->_result._to_call._bcp_advance = 0; + to_fill->_result._to_call._callee_entry_point = NULL; // doesn't matter to anyone + to_fill->_result._to_call._callee = NULL; // doesn't matter to anyone + to_fill->_prev_link = NULL; + + if (caller->is_interpreted_frame()) { + interpreterState prev = caller->get_interpreterState(); + + // Support MH calls. Make sure the interpreter will return the right address: + // 1. Caller did ordinary interpreted->compiled call call: Set a prev_state + // which makes the CPP interpreter return to frame manager "return_from_interpreted_method" + // entry after finishing execution. + // 2. Caller did a MH call: If the caller has a MethodHandleInvoke in it's + // state (invariant: must be the caller of the bottom vframe) we used the + // "call_special" entry to do the call, meaning the arguments have not been + // popped from the stack. Therefore, don't enter a prev state in this case + // in order to return to "return_from_native" frame manager entry which takes + // care of popping arguments. Also, don't overwrite the MH.invoke Method in + // the prev_state in order to be able to figure out the number of arguments to + // pop. + // The parameter method can represent MethodHandle.invokeExact(...). + // The MethodHandleCompiler generates these synthetic Methods, + // including bytecodes, if an invokedynamic call gets inlined. In + // this case we want to return like from any other interpreted + // Java call, so we set _prev_link. + to_fill->_prev_link = prev; + + if (*prev->_bcp == Bytecodes::_invokeinterface || *prev->_bcp == Bytecodes::_invokedynamic) { + prev->_result._to_call._bcp_advance = 5; + } else { + prev->_result._to_call._bcp_advance = 3; + } + } + to_fill->_oop_temp = NULL; + to_fill->_stack_base = stack_base; + // Need +1 here because stack_base points to the word just above the + // first expr stack entry and stack_limit is supposed to point to + // the word just below the last expr stack entry. See + // generate_compute_interpreter_state. + to_fill->_stack_limit = stack_base - (method->max_stack() + 1); + to_fill->_monitor_base = (BasicObjectLock*) monitor_base; + + to_fill->_frame_bottom = frame_sp; + + // PPC64 specific + to_fill->_last_Java_pc = NULL; + to_fill->_last_Java_fp = NULL; + to_fill->_last_Java_sp = frame_sp; +#ifdef ASSERT + to_fill->_self_link = to_fill; + to_fill->_native_fresult = 123456.789; + to_fill->_native_lresult = CONST64(0xdeafcafedeadc0de); +#endif +} + +void BytecodeInterpreter::pd_layout_interpreterState(interpreterState istate, + address last_Java_pc, + intptr_t* last_Java_fp) { + istate->_last_Java_pc = last_Java_pc; + istate->_last_Java_fp = last_Java_fp; +} + +int AbstractInterpreter::layout_activation(Method* method, + int temps, // Number of slots on java expression stack in use. + int popframe_args, + int monitors, // Number of active monitors. + int caller_actual_parameters, + int callee_params,// Number of slots for callee parameters. + int callee_locals,// Number of slots for locals. + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + + // NOTE this code must exactly mimic what + // InterpreterGenerator::generate_compute_interpreter_state() does + // as far as allocating an interpreter frame. However there is an + // exception. With the C++ based interpreter only the top most frame + // has a full sized expression stack. The 16 byte slop factor is + // both the abi scratch area and a place to hold a result from a + // callee on its way to the callers stack. + + int monitor_size = frame::interpreter_frame_monitor_size_in_bytes() * monitors; + int frame_size; + int top_frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes() + + monitor_size + + (method->max_stack() *Interpreter::stackElementWords * BytesPerWord) + + 2*BytesPerWord, + frame::alignment_in_bytes) + + frame::top_ijava_frame_abi_size; + if (is_top_frame) { + frame_size = top_frame_size; + } else { + frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes() + + monitor_size + + ((temps - callee_params + callee_locals) * + Interpreter::stackElementWords * BytesPerWord) + + 2*BytesPerWord, + frame::alignment_in_bytes) + + frame::parent_ijava_frame_abi_size; + assert(popframe_args==0, "non-zero for top_frame only"); + } + + // If we actually have a frame to layout we must now fill in all the pieces. + if (interpreter_frame != NULL) { + + intptr_t sp = (intptr_t)interpreter_frame->sp(); + intptr_t fp = *(intptr_t *)sp; + assert(fp == (intptr_t)caller->sp(), "fp must match"); + interpreterState cur_state = + (interpreterState)(fp - frame::interpreter_frame_cinterpreterstate_size_in_bytes()); + + // Now fill in the interpreterState object. + + intptr_t* locals; + if (caller->is_interpreted_frame()) { + // Locals must agree with the caller because it will be used to set the + // caller's tos when we return. + interpreterState prev = caller->get_interpreterState(); + // Calculate start of "locals" for MH calls. For MH calls, the + // current method() (= MH target) and prev->callee() (= + // MH.invoke*()) are different and especially have different + // signatures. To pop the argumentsof the caller, we must use + // the prev->callee()->size_of_arguments() because that's what + // the caller actually pushed. Currently, for synthetic MH + // calls (deoptimized from inlined MH calls), detected by + // is_method_handle_invoke(), we use the callee's arguments + // because here, the caller's and callee's signature match. + if (true /*!caller->is_at_mh_callsite()*/) { + locals = prev->stack() + method->size_of_parameters(); + } else { + // Normal MH call. + locals = prev->stack() + prev->callee()->size_of_parameters(); + } + } else { + bool is_deopted; + locals = (intptr_t*) (fp + ((method->max_locals() - 1) * BytesPerWord) + + frame::parent_ijava_frame_abi_size); + } + + intptr_t* monitor_base = (intptr_t*) cur_state; + intptr_t* stack_base = (intptr_t*) ((intptr_t) monitor_base - monitor_size); + + // Provide pop_frame capability on PPC64, add popframe_args. + // +1 because stack is always prepushed. + intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (temps + popframe_args + 1) * BytesPerWord); + + BytecodeInterpreter::layout_interpreterState(cur_state, + caller, + interpreter_frame, + method, + locals, + stack, + stack_base, + monitor_base, + (intptr_t*)(((intptr_t)fp)-top_frame_size), + is_top_frame); + + BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, + interpreter_frame->fp()); + } + return frame_size/BytesPerWord; +} + +#endif // CC_INTERP diff --git a/src/cpu/ppc/vm/cppInterpreter_ppc.hpp b/src/cpu/ppc/vm/cppInterpreter_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/cppInterpreter_ppc.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_CPPINTERPRETER_PPC_HPP +#define CPU_PPC_VM_CPPINTERPRETER_PPC_HPP + + protected: + + // Size of interpreter code. Increase if too small. Interpreter will + // fail with a guarantee ("not enough space for interpreter generation"); + // if too small. + // Run with +PrintInterpreter to get the VM to print out the size. + // Max size with JVMTI + + const static int InterpreterCodeSize = 12*K; + +#endif // CPU_PPC_VM_CPPINTERPRETER_PPC_HPP diff --git a/src/cpu/ppc/vm/debug_ppc.cpp b/src/cpu/ppc/vm/debug_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/debug_ppc.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/codeCache.hpp" +#include "code/nmethod.hpp" +#include "runtime/frame.hpp" +#include "runtime/init.hpp" +#include "runtime/os.hpp" +#include "utilities/debug.hpp" +#include "utilities/top.hpp" + +void pd_ps(frame f) {} diff --git a/src/cpu/ppc/vm/depChecker_ppc.hpp b/src/cpu/ppc/vm/depChecker_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/depChecker_ppc.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_DEPCHECKER_PPC_HPP +#define CPU_PPC_VM_DEPCHECKER_PPC_HPP + +// Nothing to do on ppc64 + +#endif // CPU_PPC_VM_DEPCHECKER_PPC_HPP diff --git a/src/cpu/ppc/vm/disassembler_ppc.hpp b/src/cpu/ppc/vm/disassembler_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/disassembler_ppc.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_DISASSEMBLER_PPC_HPP +#define CPU_PPC_VM_DISASSEMBLER_PPC_HPP + + static int pd_instruction_alignment() { + return sizeof(int); + } + + static const char* pd_cpu_opts() { + return "ppc64"; + } + +#endif // CPU_PPC_VM_DISASSEMBLER_PPC_HPP diff --git a/src/cpu/ppc/vm/frame_ppc.cpp b/src/cpu/ppc/vm/frame_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/frame_ppc.cpp @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "oops/markOop.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_ppc.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#include "runtime/vframeArray.hpp" +#endif + +#ifndef CC_INTERP +#error "CC_INTERP must be defined on PPC64" +#endif + +#ifdef ASSERT +void RegisterMap::check_location_valid() { +} +#endif // ASSERT + +bool frame::safe_for_sender(JavaThread *thread) { + bool safe = false; + address cursp = (address)sp(); + address curfp = (address)fp(); + if ((cursp != NULL && curfp != NULL && + (cursp <= thread->stack_base() && cursp >= thread->stack_base() - thread->stack_size())) && + (curfp <= thread->stack_base() && curfp >= thread->stack_base() - thread->stack_size())) { + safe = true; + } + return safe; +} + +bool frame::is_interpreted_frame() const { + return Interpreter::contains(pc()); +} + +frame frame::sender_for_entry_frame(RegisterMap *map) const { + assert(map != NULL, "map must be set"); + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender. + JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); + assert(!entry_frame_is_first(), "next Java fp must be non zero"); + assert(jfa->last_Java_sp() > _sp, "must be above this frame on stack"); + map->clear(); + assert(map->include_argument_oops(), "should be set by clear"); + + if (jfa->last_Java_pc() != NULL) { + frame fr(jfa->last_Java_sp(), jfa->last_Java_pc()); + return fr; + } + // Last_java_pc is not set, if we come here from compiled code. The + // constructor retrieves the PC from the stack. + frame fr(jfa->last_Java_sp()); + return fr; +} + +frame frame::sender_for_interpreter_frame(RegisterMap *map) const { + // Pass callers initial_caller_sp as unextended_sp. + return frame(sender_sp(), sender_pc(), (intptr_t*)((parent_ijava_frame_abi *)callers_abi())->initial_caller_sp); +} + +frame frame::sender_for_compiled_frame(RegisterMap *map) const { + assert(map != NULL, "map must be set"); + + // Frame owned by compiler. + address pc = *compiled_sender_pc_addr(_cb); + frame caller(compiled_sender_sp(_cb), pc); + + // Now adjust the map. + + // Get the rest. + if (map->update_map()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + if (_cb->oop_maps() != NULL) { + OopMapSet::update_register_map(this, map); + } + } + + return caller; +} + +intptr_t* frame::compiled_sender_sp(CodeBlob* cb) const { + return sender_sp(); +} + +address* frame::compiled_sender_pc_addr(CodeBlob* cb) const { + return sender_pc_addr(); +} + +frame frame::sender(RegisterMap* map) const { + // Default is we do have to follow them. The sender_for_xxx will + // update it accordingly. + map->set_include_argument_oops(false); + + if (is_entry_frame()) return sender_for_entry_frame(map); + if (is_interpreted_frame()) return sender_for_interpreter_frame(map); + assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + + if (_cb != NULL) { + return sender_for_compiled_frame(map); + } + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return frame(sender_sp(), sender_pc()); +} + +void frame::patch_pc(Thread* thread, address pc) { + if (TracePcPatching) { + tty->print_cr("patch_pc at address " PTR_FORMAT " [" PTR_FORMAT " -> " PTR_FORMAT "]", + &((address*) _sp)[-1], ((address*) _sp)[-1], pc); + } + own_abi()->lr = (uint64_t)pc; + _cb = CodeCache::find_blob(pc); + if (_cb != NULL && _cb->is_nmethod() && ((nmethod*)_cb)->is_deopt_pc(_pc)) { + address orig = (((nmethod*)_cb)->get_original_pc(this)); + assert(orig == _pc, "expected original to be stored before patching"); + _deopt_state = is_deoptimized; + // Leave _pc as is. + } else { + _deopt_state = not_deoptimized; + _pc = pc; + } +} + +void frame::pd_gc_epilog() { + if (is_interpreted_frame()) { + // Set constant pool cache entry for interpreter. + Method* m = interpreter_frame_method(); + + *interpreter_frame_cpoolcache_addr() = m->constants()->cache(); + } +} + +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { + // Is there anything to do? + assert(is_interpreted_frame(), "Not an interpreted frame"); + return true; +} + +BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { + assert(is_interpreted_frame(), "interpreted frame expected"); + Method* method = interpreter_frame_method(); + BasicType type = method->result_type(); + +#ifdef CC_INTERP + if (method->is_native()) { + // Prior to calling into the runtime to notify the method exit the possible + // result value is saved into the interpreter frame. + interpreterState istate = get_interpreterState(); + address lresult = (address)istate + in_bytes(BytecodeInterpreter::native_lresult_offset()); + address fresult = (address)istate + in_bytes(BytecodeInterpreter::native_fresult_offset()); + + switch (method->result_type()) { + case T_OBJECT: + case T_ARRAY: { + oop* obj_p = *(oop**)lresult; + oop obj = (obj_p == NULL) ? NULL : *obj_p; + assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); + *oop_result = obj; + break; + } + // We use std/stfd to store the values. + case T_BOOLEAN : value_result->z = (jboolean) *(unsigned long*)lresult; break; + case T_INT : value_result->i = (jint) *(long*)lresult; break; + case T_CHAR : value_result->c = (jchar) *(unsigned long*)lresult; break; + case T_SHORT : value_result->s = (jshort) *(long*)lresult; break; + case T_BYTE : value_result->z = (jbyte) *(long*)lresult; break; + case T_LONG : value_result->j = (jlong) *(long*)lresult; break; + case T_FLOAT : value_result->f = (jfloat) *(double*)fresult; break; + case T_DOUBLE : value_result->d = (jdouble) *(double*)fresult; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + } else { + intptr_t* tos_addr = interpreter_frame_tos_address(); + switch (method->result_type()) { + case T_OBJECT: + case T_ARRAY: { + oop obj = *(oop*)tos_addr; + assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); + *oop_result = obj; + } + case T_BOOLEAN : value_result->z = (jboolean) *(jint*)tos_addr; break; + case T_BYTE : value_result->b = (jbyte) *(jint*)tos_addr; break; + case T_CHAR : value_result->c = (jchar) *(jint*)tos_addr; break; + case T_SHORT : value_result->s = (jshort) *(jint*)tos_addr; break; + case T_INT : value_result->i = *(jint*)tos_addr; break; + case T_LONG : value_result->j = *(jlong*)tos_addr; break; + case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; + case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + } +#else + Unimplemented(); +#endif + return type; +} + +#ifndef PRODUCT + +void frame::describe_pd(FrameValues& values, int frame_no) { + if (is_interpreted_frame()) { +#ifdef CC_INTERP + interpreterState istate = get_interpreterState(); + values.describe(frame_no, (intptr_t*)istate, "istate"); + values.describe(frame_no, (intptr_t*)&(istate->_thread), " thread"); + values.describe(frame_no, (intptr_t*)&(istate->_bcp), " bcp"); + values.describe(frame_no, (intptr_t*)&(istate->_locals), " locals"); + values.describe(frame_no, (intptr_t*)&(istate->_constants), " constants"); + values.describe(frame_no, (intptr_t*)&(istate->_method), err_msg(" method = %s", istate->_method->name_and_sig_as_C_string())); + values.describe(frame_no, (intptr_t*)&(istate->_mdx), " mdx"); + values.describe(frame_no, (intptr_t*)&(istate->_stack), " stack"); + values.describe(frame_no, (intptr_t*)&(istate->_msg), err_msg(" msg = %s", BytecodeInterpreter::C_msg(istate->_msg))); + values.describe(frame_no, (intptr_t*)&(istate->_result), " result"); + values.describe(frame_no, (intptr_t*)&(istate->_prev_link), " prev_link"); + values.describe(frame_no, (intptr_t*)&(istate->_oop_temp), " oop_temp"); + values.describe(frame_no, (intptr_t*)&(istate->_stack_base), " stack_base"); + values.describe(frame_no, (intptr_t*)&(istate->_stack_limit), " stack_limit"); + values.describe(frame_no, (intptr_t*)&(istate->_monitor_base), " monitor_base"); + values.describe(frame_no, (intptr_t*)&(istate->_frame_bottom), " frame_bottom"); + values.describe(frame_no, (intptr_t*)&(istate->_last_Java_pc), " last_Java_pc"); + values.describe(frame_no, (intptr_t*)&(istate->_last_Java_fp), " last_Java_fp"); + values.describe(frame_no, (intptr_t*)&(istate->_last_Java_sp), " last_Java_sp"); + values.describe(frame_no, (intptr_t*)&(istate->_self_link), " self_link"); + values.describe(frame_no, (intptr_t*)&(istate->_native_fresult), " native_fresult"); + values.describe(frame_no, (intptr_t*)&(istate->_native_lresult), " native_lresult"); +#else + Unimplemented(); +#endif + } +} +#endif + +void frame::adjust_unextended_sp() { + // If we are returning to a compiled MethodHandle call site, the + // saved_fp will in fact be a saved value of the unextended SP. The + // simplest way to tell whether we are returning to such a call site + // is as follows: + + if (is_compiled_frame() && false /*is_at_mh_callsite()*/) { // TODO PPC port + // If the sender PC is a deoptimization point, get the original + // PC. For MethodHandle call site the unextended_sp is stored in + // saved_fp. + _unextended_sp = _fp - _cb->frame_size(); + +#ifdef ASSERT + nmethod *sender_nm = _cb->as_nmethod_or_null(); + assert(sender_nm && *_sp == *_unextended_sp, "backlink changed"); + + intptr_t* sp = _unextended_sp; // check if stack can be walked from here + for (int x = 0; x < 5; ++x) { // check up to a couple of backlinks + intptr_t* prev_sp = *(intptr_t**)sp; + if (prev_sp == 0) break; // end of stack + assert(prev_sp>sp, "broken stack"); + sp = prev_sp; + } + + if (sender_nm->is_deopt_mh_entry(_pc)) { // checks for deoptimization + address original_pc = sender_nm->get_original_pc(this); + assert(sender_nm->insts_contains(original_pc), "original PC must be in nmethod"); + assert(sender_nm->is_method_handle_return(original_pc), "must be"); + } +#endif + } +} + +intptr_t *frame::initial_deoptimization_info() { + // unused... but returns fp() to minimize changes introduced by 7087445 + return fp(); +} diff --git a/src/cpu/ppc/vm/frame_ppc.hpp b/src/cpu/ppc/vm/frame_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/frame_ppc.hpp @@ -0,0 +1,449 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_FRAME_PPC_HPP +#define CPU_PPC_VM_FRAME_PPC_HPP + +#include "runtime/synchronizer.hpp" +#include "utilities/top.hpp" + +#ifndef CC_INTERP +#error "CC_INTERP must be defined on PPC64" +#endif + + // C frame layout on PPC-64. + // + // In this figure the stack grows upwards, while memory grows + // downwards. See "64-bit PowerPC ELF ABI Supplement Version 1.7", + // IBM Corp. (2003-10-29) + // (http://math-atlas.sourceforge.net/devel/assembly/PPC-elf64abi-1.7.pdf). + // + // Square brackets denote stack regions possibly larger + // than a single 64 bit slot. + // + // STACK: + // 0 [C_FRAME] <-- SP after prolog (mod 16 = 0) + // [C_FRAME] <-- SP before prolog + // ... + // [C_FRAME] + // + // C_FRAME: + // 0 [ABI_112] + // 112 CARG_9: outgoing arg 9 (arg_1 ... arg_8 via gpr_3 ... gpr_{10}) + // ... + // 40+M*8 CARG_M: outgoing arg M (M is the maximum of outgoing args taken over all call sites in the procedure) + // local 1 + // ... + // local N + // spill slot for vector reg (16 bytes aligned) + // ... + // spill slot for vector reg + // alignment (4 or 12 bytes) + // V SR_VRSAVE + // V+4 spill slot for GR + // ... ... + // spill slot for GR + // spill slot for FR + // ... + // spill slot for FR + // + // ABI_48: + // 0 caller's SP + // 8 space for condition register (CR) for next call + // 16 space for link register (LR) for next call + // 24 reserved + // 32 reserved + // 40 space for TOC (=R2) register for next call + // + // ABI_112: + // 0 [ABI_48] + // 48 CARG_1: spill slot for outgoing arg 1. used by next callee. + // ... ... + // 104 CARG_8: spill slot for outgoing arg 8. used by next callee. + // + + public: + + // C frame layout + + enum { + // stack alignment + alignment_in_bytes = 16, + // log_2(16*8 bits) = 7. + log_2_of_alignment_in_bits = 7 + }; + + // ABI_48: + struct abi_48 { + uint64_t callers_sp; + uint64_t cr; //_16 + uint64_t lr; + uint64_t reserved1; //_16 + uint64_t reserved2; + uint64_t toc; //_16 + // nothing to add here! + // aligned to frame::alignment_in_bytes (16) + }; + + enum { + abi_48_size = sizeof(abi_48) + }; + + struct abi_112 : abi_48 { + uint64_t carg_1; + uint64_t carg_2; //_16 + uint64_t carg_3; + uint64_t carg_4; //_16 + uint64_t carg_5; + uint64_t carg_6; //_16 + uint64_t carg_7; + uint64_t carg_8; //_16 + // aligned to frame::alignment_in_bytes (16) + }; + + enum { + abi_112_size = sizeof(abi_112) + }; + + #define _abi(_component) \ + (offset_of(frame::abi_112, _component)) + + struct abi_112_spill : abi_112 { + // additional spill slots + uint64_t spill_ret; + uint64_t spill_fret; //_16 + // aligned to frame::alignment_in_bytes (16) + }; + + enum { + abi_112_spill_size = sizeof(abi_112_spill) + }; + + #define _abi_112_spill(_component) \ + (offset_of(frame::abi_112_spill, _component)) + + // non-volatile GPRs: + + struct spill_nonvolatiles { + uint64_t r14; + uint64_t r15; //_16 + uint64_t r16; + uint64_t r17; //_16 + uint64_t r18; + uint64_t r19; //_16 + uint64_t r20; + uint64_t r21; //_16 + uint64_t r22; + uint64_t r23; //_16 + uint64_t r24; + uint64_t r25; //_16 + uint64_t r26; + uint64_t r27; //_16 + uint64_t r28; + uint64_t r29; //_16 + uint64_t r30; + uint64_t r31; //_16 + + double f14; + double f15; + double f16; + double f17; + double f18; + double f19; + double f20; + double f21; + double f22; + double f23; + double f24; + double f25; + double f26; + double f27; + double f28; + double f29; + double f30; + double f31; + + // aligned to frame::alignment_in_bytes (16) + }; + + enum { + spill_nonvolatiles_size = sizeof(spill_nonvolatiles) + }; + + #define _spill_nonvolatiles_neg(_component) \ + (int)(-frame::spill_nonvolatiles_size + offset_of(frame::spill_nonvolatiles, _component)) + + // Frame layout for the Java interpreter on PPC64. + // + // This frame layout provides a C-like frame for every Java frame. + // + // In these figures the stack grows upwards, while memory grows + // downwards. Square brackets denote regions possibly larger than + // single 64 bit slots. + // + // STACK (no JNI, no compiled code, no library calls, + // interpreter-loop is active): + // 0 [InterpretMethod] + // [TOP_IJAVA_FRAME] + // [PARENT_IJAVA_FRAME] + // ... + // [PARENT_IJAVA_FRAME] + // [ENTRY_FRAME] + // [C_FRAME] + // ... + // [C_FRAME] + // + // TOP_IJAVA_FRAME: + // 0 [TOP_IJAVA_FRAME_ABI] + // alignment (optional) + // [operand stack] + // [monitors] (optional) + // [cInterpreter object] + // result, locals, and arguments are in parent frame! + // + // PARENT_IJAVA_FRAME: + // 0 [PARENT_IJAVA_FRAME_ABI] + // alignment (optional) + // [callee's Java result] + // [callee's locals w/o arguments] + // [outgoing arguments] + // [used part of operand stack w/o arguments] + // [monitors] (optional) + // [cInterpreter object] + // + // ENTRY_FRAME: + // 0 [PARENT_IJAVA_FRAME_ABI] + // alignment (optional) + // [callee's Java result] + // [callee's locals w/o arguments] + // [outgoing arguments] + // [ENTRY_FRAME_LOCALS] + // + // PARENT_IJAVA_FRAME_ABI: + // 0 [ABI_48] + // top_frame_sp + // initial_caller_sp + // + // TOP_IJAVA_FRAME_ABI: + // 0 [PARENT_IJAVA_FRAME_ABI] + // carg_3_unused + // carg_4_unused + // carg_5_unused + // carg_6_unused + // carg_7_unused + // frame_manager_lr + // + + // PARENT_IJAVA_FRAME_ABI + + struct parent_ijava_frame_abi : abi_48 { + // SOE registers. + // C2i adapters spill their top-frame stack-pointer here. + uint64_t top_frame_sp; // carg_1 + // Sp of calling compiled frame before it was resized by the c2i + // adapter or sp of call stub. Does not contain a valid value for + // non-initial frames. + uint64_t initial_caller_sp; // carg_2 + // aligned to frame::alignment_in_bytes (16) + }; + + enum { + parent_ijava_frame_abi_size = sizeof(parent_ijava_frame_abi) + }; + + #define _parent_ijava_frame_abi(_component) \ + (offset_of(frame::parent_ijava_frame_abi, _component)) + + // TOP_IJAVA_FRAME_ABI + + struct top_ijava_frame_abi : parent_ijava_frame_abi { + uint64_t carg_3_unused; // carg_3 + uint64_t card_4_unused; //_16 carg_4 + uint64_t carg_5_unused; // carg_5 + uint64_t carg_6_unused; //_16 carg_6 + uint64_t carg_7_unused; // carg_7 + // Use arg8 for storing frame_manager_lr. The size of + // top_ijava_frame_abi must match abi_112. + uint64_t frame_manager_lr; //_16 carg_8 + // nothing to add here! + // aligned to frame::alignment_in_bytes (16) + }; + + enum { + top_ijava_frame_abi_size = sizeof(top_ijava_frame_abi) + }; + + #define _top_ijava_frame_abi(_component) \ + (offset_of(frame::top_ijava_frame_abi, _component)) + + // ENTRY_FRAME + + struct entry_frame_locals { + uint64_t call_wrapper_address; + uint64_t result_address; //_16 + uint64_t result_type; + uint64_t arguments_tos_address; //_16 + // aligned to frame::alignment_in_bytes (16) + uint64_t r[spill_nonvolatiles_size/sizeof(uint64_t)]; + }; + + enum { + entry_frame_locals_size = sizeof(entry_frame_locals) + }; + + #define _entry_frame_locals_neg(_component) \ + (int)(-frame::entry_frame_locals_size + offset_of(frame::entry_frame_locals, _component)) + + + // Frame layout for JIT generated methods + // + // In these figures the stack grows upwards, while memory grows + // downwards. Square brackets denote regions possibly larger than single + // 64 bit slots. + // + // STACK (interpreted Java calls JIT generated Java): + // [JIT_FRAME] <-- SP (mod 16 = 0) + // [TOP_IJAVA_FRAME] + // ... + // + // JIT_FRAME (is a C frame according to PPC-64 ABI): + // [out_preserve] + // [out_args] + // [spills] + // [pad_1] + // [monitor] (optional) + // ... + // [monitor] (optional) + // [pad_2] + // [in_preserve] added / removed by prolog / epilog + // + + // JIT_ABI (TOP and PARENT) + + struct jit_abi { + uint64_t callers_sp; + uint64_t cr; + uint64_t lr; + uint64_t toc; + // Nothing to add here! + // NOT ALIGNED to frame::alignment_in_bytes (16). + }; + + struct jit_out_preserve : jit_abi { + // Nothing to add here! + }; + + struct jit_in_preserve { + // Nothing to add here! + }; + + enum { + jit_out_preserve_size = sizeof(jit_out_preserve), + jit_in_preserve_size = sizeof(jit_in_preserve) + }; + + struct jit_monitor { + uint64_t monitor[1]; + }; + + enum { + jit_monitor_size = sizeof(jit_monitor), + }; + + private: + + // STACK: + // ... + // [THIS_FRAME] <-- this._sp (stack pointer for this frame) + // [CALLER_FRAME] <-- this.fp() (_sp of caller's frame) + // ... + // + + // frame pointer for this frame + intptr_t* _fp; + + // The frame's stack pointer before it has been extended by a c2i adapter; + // needed by deoptimization + intptr_t* _unextended_sp; + void adjust_unextended_sp(); + + public: + + // Accessors for fields + intptr_t* fp() const { return _fp; } + + // Accessors for ABIs + inline abi_48* own_abi() const { return (abi_48*) _sp; } + inline abi_48* callers_abi() const { return (abi_48*) _fp; } + + private: + + // Find codeblob and set deopt_state. + inline void find_codeblob_and_set_pc_and_deopt_state(address pc); + + public: + + // Constructors + inline frame(intptr_t* sp); + frame(intptr_t* sp, address pc); + inline frame(intptr_t* sp, address pc, intptr_t* unextended_sp); + + private: + + intptr_t* compiled_sender_sp(CodeBlob* cb) const; + address* compiled_sender_pc_addr(CodeBlob* cb) const; + address* sender_pc_addr(void) const; + + public: + +#ifdef CC_INTERP + // Additional interface for interpreter frames: + inline interpreterState get_interpreterState() const; +#endif + + // Size of a monitor in bytes. + static int interpreter_frame_monitor_size_in_bytes(); + + // The size of a cInterpreter object. + static inline int interpreter_frame_cinterpreterstate_size_in_bytes(); + + private: + + // PPC port: permgen stuff + ConstantPoolCache** interpreter_frame_cpoolcache_addr() const; + + public: + + // Additional interface for entry frames: + inline entry_frame_locals* get_entry_frame_locals() const { + return (entry_frame_locals*) (((address) fp()) - entry_frame_locals_size); + } + + enum { + // normal return address is 1 bundle past PC + pc_return_offset = 0 + }; + +#endif // CPU_PPC_VM_FRAME_PPC_HPP diff --git a/src/cpu/ppc/vm/frame_ppc.inline.hpp b/src/cpu/ppc/vm/frame_ppc.inline.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/frame_ppc.inline.hpp @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_FRAME_PPC_INLINE_HPP +#define CPU_PPC_VM_FRAME_PPC_INLINE_HPP + +#ifndef CC_INTERP +#error "CC_INTERP must be defined on PPC64" +#endif + +// Inline functions for ppc64 frames: + +// Find codeblob and set deopt_state. +inline void frame::find_codeblob_and_set_pc_and_deopt_state(address pc) { + assert(pc != NULL, "precondition: must have PC"); + + _cb = CodeCache::find_blob(pc); + _pc = pc; // Must be set for get_deopt_original_pc() + + _fp = (intptr_t*)own_abi()->callers_sp; + // Use _fp - frame_size, needs to be done between _cb and _pc initialization + // and get_deopt_original_pc. + adjust_unextended_sp(); + + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } + + assert(((uint64_t)_sp & 0xf) == 0, "SP must be 16-byte aligned"); +} + +// Constructors + +// Initialize all fields, _unextended_sp will be adjusted in find_codeblob_and_set_pc_and_deopt_state. +inline frame::frame() : _sp(NULL), _unextended_sp(NULL), _fp(NULL), _cb(NULL), _pc(NULL), _deopt_state(unknown) {} + +inline frame::frame(intptr_t* sp) : _sp(sp), _unextended_sp(sp) { + find_codeblob_and_set_pc_and_deopt_state((address)own_abi()->lr); // also sets _fp and adjusts _unextended_sp +} + +inline frame::frame(intptr_t* sp, address pc) : _sp(sp), _unextended_sp(sp) { + find_codeblob_and_set_pc_and_deopt_state(pc); // also sets _fp and adjusts _unextended_sp +} + +inline frame::frame(intptr_t* sp, address pc, intptr_t* unextended_sp) : _sp(sp), _unextended_sp(unextended_sp) { + find_codeblob_and_set_pc_and_deopt_state(pc); // also sets _fp and adjusts _unextended_sp +} + +// Accessors + +// Return unique id for this frame. The id must have a value where we +// can distinguish identity and younger/older relationship. NULL +// represents an invalid (incomparable) frame. +inline intptr_t* frame::id(void) const { + // Use the _unextended_pc as the frame's ID. Because we have no + // adapters, but resized compiled frames, some of the new code + // (e.g. JVMTI) wouldn't work if we return the (current) SP of the + // frame. + return _unextended_sp; +} + +// Return true if this frame is older (less recent activation) than +// the frame represented by id. +inline bool frame::is_older(intptr_t* id) const { + assert(this->id() != NULL && id != NULL, "NULL frame id"); + // Stack grows towards smaller addresses on ppc64. + return this->id() > id; +} + +inline int frame::frame_size(RegisterMap* map) const { + // Stack grows towards smaller addresses on PPC64: sender is at a higher address. + return sender_sp() - sp(); +} + +// Return the frame's stack pointer before it has been extended by a +// c2i adapter. This is needed by deoptimization for ignoring c2i adapter +// frames. +inline intptr_t* frame::unextended_sp() const { + return _unextended_sp; +} + +// All frames have this field. +inline address frame::sender_pc() const { + return (address)callers_abi()->lr; +} +inline address* frame::sender_pc_addr() const { + return (address*)&(callers_abi()->lr); +} + +// All frames have this field. +inline intptr_t* frame::sender_sp() const { + return (intptr_t*)callers_abi(); +} + +// All frames have this field. +inline intptr_t* frame::link() const { + return (intptr_t*)callers_abi()->callers_sp; +} + +inline intptr_t* frame::real_fp() const { + return fp(); +} + +#ifdef CC_INTERP + +inline interpreterState frame::get_interpreterState() const { + return (interpreterState)(((address)callers_abi()) + - frame::interpreter_frame_cinterpreterstate_size_in_bytes()); +} + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + interpreterState istate = get_interpreterState(); + return (intptr_t**)&istate->_locals; +} + +inline intptr_t* frame::interpreter_frame_bcx_addr() const { + interpreterState istate = get_interpreterState(); + return (intptr_t*)&istate->_bcp; +} + +inline intptr_t* frame::interpreter_frame_mdx_addr() const { + interpreterState istate = get_interpreterState(); + return (intptr_t*)&istate->_mdx; +} + +inline intptr_t* frame::interpreter_frame_expression_stack() const { + return (intptr_t*)interpreter_frame_monitor_end() - 1; +} + +inline jint frame::interpreter_frame_expression_stack_direction() { + return -1; +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { + interpreterState istate = get_interpreterState(); + return istate->_stack + 1; +} + +inline intptr_t* frame::interpreter_frame_tos_at(jint offset) const { + return &interpreter_frame_tos_address()[offset]; +} + +// monitor elements + +// in keeping with Intel side: end is lower in memory than begin; +// and beginning element is oldest element +// Also begin is one past last monitor. + +inline BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return get_interpreterState()->monitor_base(); +} + +inline BasicObjectLock* frame::interpreter_frame_monitor_end() const { + return (BasicObjectLock*)get_interpreterState()->stack_base(); +} + +inline int frame::interpreter_frame_cinterpreterstate_size_in_bytes() { + // Size of an interpreter object. Not aligned with frame size. + return round_to(sizeof(BytecodeInterpreter), 8); +} + +inline Method** frame::interpreter_frame_method_addr() const { + interpreterState istate = get_interpreterState(); + return &istate->_method; +} + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cpoolcache_addr() const { + interpreterState istate = get_interpreterState(); + return &istate->_constants; // should really use accessor +} + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + interpreterState istate = get_interpreterState(); + return &istate->_constants; +} +#endif // CC_INTERP + +inline int frame::interpreter_frame_monitor_size() { + // Number of stack slots for a monitor. + return round_to(BasicObjectLock::size(), // number of stack slots + WordsPerLong); // number of stack slots for a Java long +} + +inline int frame::interpreter_frame_monitor_size_in_bytes() { + return frame::interpreter_frame_monitor_size() * wordSize; +} + +// entry frames + +inline intptr_t* frame::entry_frame_argument_at(int offset) const { + // Since an entry frame always calls the interpreter first, the + // parameters are on the stack and relative to known register in the + // entry frame. + intptr_t* tos = (intptr_t*)get_entry_frame_locals()->arguments_tos_address; + return &tos[offset + 1]; // prepushed tos +} + +inline JavaCallWrapper* frame::entry_frame_call_wrapper() const { + return (JavaCallWrapper*)get_entry_frame_locals()->call_wrapper_address; +} + +inline oop frame::saved_oop_result(RegisterMap* map) const { + return *((oop*)map->location(R3->as_VMReg())); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + *((oop*)map->location(R3->as_VMReg())) = obj; +} + +#endif // CPU_PPC_VM_FRAME_PPC_INLINE_HPP diff --git a/src/cpu/ppc/vm/globalDefinitions_ppc.hpp b/src/cpu/ppc/vm/globalDefinitions_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/globalDefinitions_ppc.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP +#define CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP + +// Size of PPC Instructions +const int BytesPerInstWord = 4; + +const int StackAlignmentInBytes = 16; + +#endif // CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP diff --git a/src/cpu/ppc/vm/globals_ppc.hpp b/src/cpu/ppc/vm/globals_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/globals_ppc.hpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_GLOBALS_PPC_HPP +#define CPU_PPC_VM_GLOBALS_PPC_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, ConvertSleepToYield, true); +define_pd_global(bool, ShareVtableStubs, false); // Improves performance markedly for mtrt and compress. +define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this. + + +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks. +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast. + +// Use large code-entry alignment. +define_pd_global(intx, CodeEntryAlignment, 128); +define_pd_global(intx, OptoLoopAlignment, 16); +define_pd_global(intx, InlineFrequencyCount, 100); +define_pd_global(intx, InlineSmallCode, 1500); + +define_pd_global(intx, PreInflateSpin, 10); + +// Flags for template interpreter. +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); + +define_pd_global(bool, UseMembar, false); + +// GC Ergo Flags +define_pd_global(intx, CMSYoungGenPerWorker, 16*M); // Default max size of CMS young gen, per GC worker thread. + + +// Platform dependent flag handling: flags only defined on this platform. +#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ + product(uintx, PowerArchitecturePPC64, 0, \ + "CPU Version: x for PowerX. Currently recognizes Power5 to " \ + "Power7. Default is 0. CPUs newer than Power7 will be " \ + "recognized as Power7.") \ + \ + /* Reoptimize code-sequences of calls at runtime, e.g. replace an */ \ + /* indirect call by a direct call. */ \ + product(bool, ReoptimizeCallSequences, true, \ + "Reoptimize code-sequences of calls at runtime.") \ + \ + product(bool, UseLoadInstructionsForStackBangingPPC64, false, \ + "Use load instructions for stack banging.") \ + \ + /* special instructions */ \ + \ + product(bool, UseCountLeadingZerosInstructionsPPC64, true, \ + "Use count leading zeros instructions.") \ + \ + product(bool, UseExtendedLoadAndReserveInstructionsPPC64, false, \ + "Use extended versions of load-and-reserve instructions.") \ + \ + product(bool, UseRotateAndMaskInstructionsPPC64, true, \ + "Use rotate and mask instructions.") \ + \ + product(bool, UseStaticBranchPredictionInCompareAndSwapPPC64, true, \ + "Use static branch prediction hints in CAS operations.") \ + \ + /* Trap based checks. */ \ + /* Trap based checks use the ppc trap instructions to check certain */ \ + /* conditions. This instruction raises a SIGTRAP caught by the */ \ + /* exception handler of the VM. */ \ + product(bool, UseSIGTRAP, false, \ + "Allow trap instructions that make use of SIGTRAP. Use this to " \ + "switch off all optimizations requiring SIGTRAP.") \ + product(bool, TrapBasedICMissChecks, true, \ + "Raise and handle SIGTRAP if inline cache miss detected.") \ + product(bool, TrapBasedNotEntrantChecks, true, \ + "Raise and handle SIGTRAP if calling not entrant or zombie" \ + " method.") \ + product(bool, TrapBasedNullChecks, true, \ + "Generate code for null checks that uses a cmp and trap " \ + "instruction raising SIGTRAP. This is only used if an access to" \ + "null (+offset) will not raise a SIGSEGV.") \ + product(bool, TrapBasedRangeChecks, true, \ + "Raise and handle SIGTRAP if array out of bounds check fails.") \ + product(bool, TraceTraps, false, "Trace all traps the signal handler" \ + "handles.") \ + \ + product(bool, ZapMemory, false, "Write 0x0101... to empty memory." \ + " Use this to ease debugging.") \ + + + +#endif // CPU_PPC_VM_GLOBALS_PPC_HPP diff --git a/src/cpu/ppc/vm/icBuffer_ppc.cpp b/src/cpu/ppc/vm/icBuffer_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/icBuffer_ppc.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "assembler_ppc.inline.hpp" +#include "code/icBuffer.hpp" +#include "gc_interface/collectedHeap.inline.hpp" +#include "interpreter/bytecodes.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_ppc.hpp" +#include "oops/oop.inline.hpp" +#include "oops/oop.inline2.hpp" + +#define __ masm. + +int InlineCacheBuffer::ic_stub_code_size() { + return MacroAssembler::load_const_size + MacroAssembler::b64_patchable_size; +} + +void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { + ResourceMark rm; + CodeBuffer code(code_begin, ic_stub_code_size()); + MacroAssembler masm(&code); + // Note: even though the code contains an embedded metadata, we do not need reloc info + // because + // (1) the metadata is old (i.e., doesn't matter for scavenges) + // (2) these ICStubs are removed *before* a GC happens, so the roots disappear. + + // Load the oop ... + __ load_const(R19_method, (address) cached_value, R0); + // ... and jump to entry point. + __ b64_patchable((address) entry_point, relocInfo::none); + + __ flush(); +} + +address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + NativeJump* jump = nativeJump_at(move->next_instruction_address()); + return jump->jump_destination(); +} + +void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + void* o = (void*)move->data(); + return o; +} + diff --git a/src/cpu/ppc/vm/icache_ppc.cpp b/src/cpu/ppc/vm/icache_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/icache_ppc.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "assembler_ppc.inline.hpp" +#include "runtime/icache.hpp" + +// Use inline assembler to implement icache flush. +int ppc64_flush_icache(address start, int lines, int magic){ + address end = start + (unsigned int)lines*ICache::line_size; + assert(start <= end, "flush_icache parms"); + + // store modified cache lines from data cache + for (address a=start; amark().set_unlocked(); + // monitor->lock()->set_displaced_header(displaced_header); + // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) { + // // We stored the monitor address into the object's mark word. + // } else if (THREAD->is_lock_owned((address)displaced_header)) + // // Simple recursive case. + // monitor->lock()->set_displaced_header(NULL); + // } else { + // // Slow path. + // InterpreterRuntime::monitorenter(THREAD, monitor); + // } + + const Register displaced_header = R7_ARG5; + const Register object_mark_addr = R8_ARG6; + const Register current_header = R9_ARG7; + const Register tmp = R10_ARG8; + + Label done; + Label slow_case; + + assert_different_registers(displaced_header, object_mark_addr, current_header, tmp); + + + // markOop displaced_header = obj->mark().set_unlocked(); + + // Load markOop from object into displaced_header. + ld(displaced_header, oopDesc::mark_offset_in_bytes(), object); + + if (UseBiasedLocking) { + biased_locking_enter(CCR0, object, displaced_header, tmp, current_header, done, &slow_case); + } + + // Set displaced_header to be (markOop of object | UNLOCK_VALUE). + ori(displaced_header, displaced_header, markOopDesc::unlocked_value); + + + // monitor->lock()->set_displaced_header(displaced_header); + + // Initialize the box (Must happen before we update the object mark!). + std(displaced_header, BasicObjectLock::lock_offset_in_bytes() + + BasicLock::displaced_header_offset_in_bytes(), monitor); + + // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) { + + // Store stack address of the BasicObjectLock (this is monitor) into object. + addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes()); + + // Must fence, otherwise, preceding store(s) may float below cmpxchg. + // CmpxchgX sets CCR0 to cmpX(current, displaced). + fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ? + cmpxchgd(/*flag=*/CCR0, + /*current_value=*/current_header, + /*compare_value=*/displaced_header, /*exchange_value=*/monitor, + /*where=*/object_mark_addr, + MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_acquire_lock()); + + // If the compare-and-exchange succeeded, then we found an unlocked + // object and we have now locked it. + beq(CCR0, done); + + + // } else if (THREAD->is_lock_owned((address)displaced_header)) + // // Simple recursive case. + // monitor->lock()->set_displaced_header(NULL); + + // We did not see an unlocked object so try the fast recursive case. + + // Check if owner is self by comparing the value in the markOop of object + // (current_header) with the stack pointer. + sub(current_header, current_header, R1_SP); + + assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); + load_const_optimized(tmp, + (address) (~(os::vm_page_size()-1) | + markOopDesc::lock_mask_in_place)); + + and_(R0/*==0?*/, current_header, tmp); + // If condition is true we are done and hence we can store 0 in the displaced + // header indicating it is a recursive lock. + bne(CCR0, slow_case); + release(); + std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() + + BasicLock::displaced_header_offset_in_bytes(), monitor); + b(done); + + + // } else { + // // Slow path. + // InterpreterRuntime::monitorenter(THREAD, monitor); + + // None of the above fast optimizations worked so we have to get into the + // slow case of monitor enter. + bind(slow_case); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + monitor, /*check_for_exceptions=*/false); + // } + + bind(done); + } +} + +// Unlocks an object. Used in monitorexit bytecode and remove_activation. +// +// Registers alive +// monitor - Address of the BasicObjectLock to be used for locking, +// which must be initialized with the object to lock. +// +// Throw IllegalMonitorException if object is not locked by current thread. +void InterpreterMacroAssembler::unlock_object(Register monitor) { + if (UseHeavyMonitors) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), + monitor, /*check_for_exceptions=*/false); + } else { + + // template code: + // + // if ((displaced_header = monitor->displaced_header()) == NULL) { + // // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL. + // monitor->set_obj(NULL); + // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) { + // // We swapped the unlocked mark in displaced_header into the object's mark word. + // monitor->set_obj(NULL); + // } else { + // // Slow path. + // InterpreterRuntime::monitorexit(THREAD, monitor); + // } + + const Register object = R7_ARG5; + const Register displaced_header = R8_ARG6; + const Register object_mark_addr = R9_ARG7; + const Register current_header = R10_ARG8; + + Label no_recursive_unlock; + Label slow_case; + Label done; + + assert_different_registers(object, displaced_header, object_mark_addr, current_header); + + if (UseBiasedLocking) { + // The object address from the monitor is in object. + ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor); + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + biased_locking_exit(CCR0, object, displaced_header, done); + } + + // Test first if we are in the fast recursive case. + ld(displaced_header, BasicObjectLock::lock_offset_in_bytes() + + BasicLock::displaced_header_offset_in_bytes(), monitor); + + // If the displaced header is zero, we have a recursive unlock. + cmpdi(CCR0, displaced_header, 0); + bne(CCR0, no_recursive_unlock); + // Release in recursive unlock is not necessary. + // release(); + std(displaced_header/*==0!*/, BasicObjectLock::obj_offset_in_bytes(), monitor); + b(done); + + bind(no_recursive_unlock); + + // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) { + // // We swapped the unlocked mark in displaced_header into the object's mark word. + // monitor->set_obj(NULL); + + // If we still have a lightweight lock, unlock the object and be done. + + // The object address from the monitor is in object. + ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor); + addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes()); + + // We have the displaced header in displaced_header. If the lock is still + // lightweight, it will contain the monitor address and we'll store the + // displaced header back into the object's mark word. + // CmpxchgX sets CCR0 to cmpX(current, monitor). + cmpxchgd(/*flag=*/CCR0, + /*current_value=*/current_header, + /*compare_value=*/monitor, /*exchange_value=*/displaced_header, + /*where=*/object_mark_addr, + MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_release_lock()); + bne(CCR0, slow_case); + + // Exchange worked, do monitor->set_obj(NULL). + li(R0, 0); + // Must realease earlier (see cmpxchgd above). + // release(); + std(R0, BasicObjectLock::obj_offset_in_bytes(), monitor); + b(done); + + + // } else { + // // Slow path. + // InterpreterRuntime::monitorexit(THREAD, monitor); + + // The lock has been converted into a heavy lock and hence + // we need to get into the slow case. + bind(slow_case); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), + monitor, /*check_for_exceptions=*/false); + // } + + bind(done); + } +} + +void InterpreterMacroAssembler::get_method_counters(Register method, + Register Rcounters, + Label& skip) { + BLOCK_COMMENT("Load and ev. allocate counter object {"); + Label has_counters; + ld(Rcounters, in_bytes(Method::method_counters_offset()), method); + cmpdi(CCR0, Rcounters, 0); + bne(CCR0, has_counters); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), method, false); + ld(Rcounters, in_bytes(Method::method_counters_offset()), method); + cmpdi(CCR0, Rcounters, 0); + beq(CCR0, skip); // No MethodCounters, OutOfMemory. + BLOCK_COMMENT("} Load and ev. allocate counter object"); + + bind(has_counters); +} + +void InterpreterMacroAssembler::increment_invocation_counter(Register Rcounters, Register iv_be_count, Register Rtmp_r0) { + assert(UseCompiler, "incrementing must be useful"); + Register invocation_count = iv_be_count; + Register backedge_count = Rtmp_r0; + int delta = InvocationCounter::count_increment; + + // Load each counter in a register. + // ld(inv_counter, Rtmp); + // ld(be_counter, Rtmp2); + int inv_counter_offset = in_bytes(MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + int be_counter_offset = in_bytes(MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset()); + + BLOCK_COMMENT("Increment profiling counters {"); + + // Load the backedge counter. + lwz(backedge_count, be_counter_offset, Rcounters); // is unsigned int + // Mask the backedge counter. + Register tmp = invocation_count; + li(tmp, InvocationCounter::count_mask_value); + andr(backedge_count, tmp, backedge_count); // Cannot use andi, need sign extension of count_mask_value. + + // Load the invocation counter. + lwz(invocation_count, inv_counter_offset, Rcounters); // is unsigned int + // Add the delta to the invocation counter and store the result. + addi(invocation_count, invocation_count, delta); + // Store value. + stw(invocation_count, inv_counter_offset, Rcounters); + + // Add invocation counter + backedge counter. + add(iv_be_count, backedge_count, invocation_count); + + // Note that this macro must leave the backedge_count + invocation_count in + // register iv_be_count! + BLOCK_COMMENT("} Increment profiling counters"); +} + +void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { + if (state == atos) { MacroAssembler::verify_oop(reg); } +} + +// Inline assembly for: +// +// if (thread is in interp_only_mode) { +// InterpreterRuntime::post_method_entry(); +// } +// if (*jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_ENTRY ) || +// *jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_ENTRY2) ) { +// SharedRuntime::jvmpi_method_entry(method, receiver); +// } +void InterpreterMacroAssembler::notify_method_entry() { + // JVMTI + // Whenever JVMTI puts a thread in interp_only_mode, method + // entry/exit events are sent for that thread to track stack + // depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (JvmtiExport::can_post_interpreter_events()) { + Label jvmti_post_done; + + lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread); + cmpwi(CCR0, R0, 0); + beq(CCR0, jvmti_post_done); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry), + /*check_exceptions=*/false); + + bind(jvmti_post_done); + } +} + + +// Inline assembly for: +// +// if (thread is in interp_only_mode) { +// // save result +// InterpreterRuntime::post_method_exit(); +// // restore result +// } +// if (*jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_EXIT)) { +// // save result +// SharedRuntime::jvmpi_method_exit(); +// // restore result +// } +// +// Native methods have their result stored in d_tmp and l_tmp. +// Java methods have their result stored in the expression stack. +void InterpreterMacroAssembler::notify_method_exit(bool is_native_method, TosState state) { + // JVMTI + // Whenever JVMTI puts a thread in interp_only_mode, method + // entry/exit events are sent for that thread to track stack + // depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (JvmtiExport::can_post_interpreter_events()) { + Label jvmti_post_done; + + lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread); + cmpwi(CCR0, R0, 0); + beq(CCR0, jvmti_post_done); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit), + /*check_exceptions=*/false); + + bind(jvmti_post_done); + } +} + +// Convert the current TOP_IJAVA_FRAME into a PARENT_IJAVA_FRAME +// (using parent_frame_resize) and push a new interpreter +// TOP_IJAVA_FRAME (using frame_size). +void InterpreterMacroAssembler::push_interpreter_frame(Register top_frame_size, Register parent_frame_resize, + Register tmp1, Register tmp2, Register tmp3, + Register tmp4, Register pc) { + assert_different_registers(top_frame_size, parent_frame_resize, tmp1, tmp2, tmp3, tmp4); + ld(tmp1, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + mr(tmp2/*top_frame_sp*/, R1_SP); + // Move initial_caller_sp. + ld(tmp4, _top_ijava_frame_abi(initial_caller_sp), R1_SP); + neg(parent_frame_resize, parent_frame_resize); + resize_frame(parent_frame_resize/*-parent_frame_resize*/, tmp3); + + // Set LR in new parent frame. + std(tmp1, _abi(lr), R1_SP); + // Set top_frame_sp info for new parent frame. + std(tmp2, _parent_ijava_frame_abi(top_frame_sp), R1_SP); + std(tmp4, _parent_ijava_frame_abi(initial_caller_sp), R1_SP); + + // Push new TOP_IJAVA_FRAME. + push_frame(top_frame_size, tmp2); + + get_PC_trash_LR(tmp3); + std(tmp3, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + // Used for non-initial callers by unextended_sp(). + std(R1_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP); +} + +// Pop the topmost TOP_IJAVA_FRAME and convert the previous +// PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME. +void InterpreterMacroAssembler::pop_interpreter_frame(Register tmp1, Register tmp2, Register tmp3, Register tmp4) { + assert_different_registers(tmp1, tmp2, tmp3, tmp4); + + ld(tmp1/*caller's sp*/, _abi(callers_sp), R1_SP); + ld(tmp3, _abi(lr), tmp1); + + ld(tmp4, _parent_ijava_frame_abi(initial_caller_sp), tmp1); + + ld(tmp2/*caller's caller's sp*/, _abi(callers_sp), tmp1); + // Merge top frame. + std(tmp2, _abi(callers_sp), R1_SP); + + ld(tmp2, _parent_ijava_frame_abi(top_frame_sp), tmp1); + + // Update C stack pointer to caller's top_abi. + resize_frame_absolute(tmp2/*addr*/, tmp1/*tmp*/, tmp2/*tmp*/); + + // Update LR in top_frame. + std(tmp3, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + + std(tmp4, _top_ijava_frame_abi(initial_caller_sp), R1_SP); + + // Store the top-frame stack-pointer for c2i adapters. + std(R1_SP, _top_ijava_frame_abi(top_frame_sp), R1_SP); +} + +#ifdef CC_INTERP +// Turn state's interpreter frame into the current TOP_IJAVA_FRAME. +void InterpreterMacroAssembler::pop_interpreter_frame_to_state(Register state, Register tmp1, Register tmp2, Register tmp3) { + assert_different_registers(R14_state, R15_prev_state, tmp1, tmp2, tmp3); + + if (state == R14_state) { + ld(tmp1/*state's fp*/, state_(_last_Java_fp)); + ld(tmp2/*state's sp*/, state_(_last_Java_sp)); + } else if (state == R15_prev_state) { + ld(tmp1/*state's fp*/, prev_state_(_last_Java_fp)); + ld(tmp2/*state's sp*/, prev_state_(_last_Java_sp)); + } else { + ShouldNotReachHere(); + } + + // Merge top frames. + std(tmp1, _abi(callers_sp), R1_SP); + + // Tmp2 is new SP. + // Tmp1 is parent's SP. + resize_frame_absolute(tmp2/*addr*/, tmp1/*tmp*/, tmp2/*tmp*/); + + // Update LR in top_frame. + // Must be interpreter frame. + get_PC_trash_LR(tmp3); + std(tmp3, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + // Used for non-initial callers by unextended_sp(). + std(R1_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP); +} +#endif // CC_INTERP + +// Set SP to initial caller's sp, but before fix the back chain. +void InterpreterMacroAssembler::resize_frame_to_initial_caller(Register tmp1, Register tmp2) { + ld(tmp1, _parent_ijava_frame_abi(initial_caller_sp), R1_SP); + ld(tmp2, _parent_ijava_frame_abi(callers_sp), R1_SP); + std(tmp2, _parent_ijava_frame_abi(callers_sp), tmp1); // Fix back chain ... + mr(R1_SP, tmp1); // ... and resize to initial caller. +} + +#ifdef CC_INTERP +// Pop the current interpreter state (without popping the correspoding +// frame) and restore R14_state and R15_prev_state accordingly. +// Use prev_state_may_be_0 to indicate whether prev_state may be 0 +// in order to generate an extra check before retrieving prev_state_(_prev_link). +void InterpreterMacroAssembler::pop_interpreter_state(bool prev_state_may_be_0) +{ + // Move prev_state to state and restore prev_state from state_(_prev_link). + Label prev_state_is_0; + mr(R14_state, R15_prev_state); + + // Don't retrieve /*state==*/prev_state_(_prev_link) + // if /*state==*/prev_state is 0. + if (prev_state_may_be_0) { + cmpdi(CCR0, R15_prev_state, 0); + beq(CCR0, prev_state_is_0); + } + + ld(R15_prev_state, /*state==*/prev_state_(_prev_link)); + bind(prev_state_is_0); +} + +void InterpreterMacroAssembler::restore_prev_state() { + // _prev_link is private, but cInterpreter is a friend. + ld(R15_prev_state, state_(_prev_link)); +} +#endif // CC_INTERP diff --git a/src/cpu/ppc/vm/interp_masm_ppc_64.hpp b/src/cpu/ppc/vm/interp_masm_ppc_64.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/interp_masm_ppc_64.hpp @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_INTERP_MASM_PPC_64_HPP +#define CPU_PPC_VM_INTERP_MASM_PPC_64_HPP + +#include "assembler_ppc.inline.hpp" +#include "interpreter/invocationCounter.hpp" + +// This file specializes the assembler with interpreter-specific macros + + +class InterpreterMacroAssembler: public MacroAssembler { + + public: + InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {} + + // Handy address generation macros +#define thread_(field_name) in_bytes(JavaThread::field_name ## _offset()), R16_thread +#define method_(field_name) in_bytes(Method::field_name ## _offset()), R19_method + +#ifdef CC_INTERP +#define state_(field_name) in_bytes(byte_offset_of(BytecodeInterpreter, field_name)), R14_state +#define prev_state_(field_name) in_bytes(byte_offset_of(BytecodeInterpreter, field_name)), R15_prev_state +#endif + + void get_method_counters(Register method, Register Rcounters, Label& skip); + void increment_invocation_counter(Register iv_be_count, Register Rtmp1, Register Rtmp2_r0); + + // Object locking + void lock_object (Register lock_reg, Register obj_reg); + void unlock_object(Register lock_reg); + + // Debugging + void verify_oop(Register reg, TosState state = atos); // only if +VerifyOops && state == atos + + // support for jvmdi/jvmpi + void notify_method_entry(); + void notify_method_exit(bool save_result, TosState state); + + // Convert the current TOP_IJAVA_FRAME into a PARENT_IJAVA_FRAME + // (using parent_frame_resize) and push a new interpreter + // TOP_IJAVA_FRAME (using frame_size). + void push_interpreter_frame(Register top_frame_size, Register parent_frame_resize, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register pc=noreg); + + // Pop the topmost TOP_IJAVA_FRAME and convert the previous + // PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME. + void pop_interpreter_frame(Register tmp1, Register tmp2, Register tmp3, Register tmp4); + + // Turn state's interpreter frame into the current TOP_IJAVA_FRAME. + void pop_interpreter_frame_to_state(Register state, Register tmp1, Register tmp2, Register tmp3); + + // Set SP to initial caller's sp, but before fix the back chain. + void resize_frame_to_initial_caller(Register tmp1, Register tmp2); + + // Pop the current interpreter state (without popping the + // correspoding frame) and restore R14_state and R15_prev_state + // accordingly. Use prev_state_may_be_0 to indicate whether + // prev_state may be 0 in order to generate an extra check before + // retrieving prev_state_(_prev_link). + void pop_interpreter_state(bool prev_state_may_be_0); + + void restore_prev_state(); +}; + +#endif // CPU_PPC_VM_INTERP_MASM_PPC_64_HPP diff --git a/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp b/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP +#define CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP + + friend class AbstractInterpreterGenerator; + + private: + + address generate_abstract_entry(void); + address generate_accessor_entry(void); + address generate_Reference_get_entry(void); + +#endif // CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP diff --git a/src/cpu/ppc/vm/interpreterRT_ppc.cpp b/src/cpu/ppc/vm/interpreterRT_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/interpreterRT_ppc.cpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/universe.inline.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/signature.hpp" + +#define __ _masm-> + +// Access macros for Java and C arguments. +// The first Java argument is at index -1. +#define locals_j_arg_at(index) (Interpreter::local_offset_in_bytes(index)), R18_locals +// The first C argument is at index 0. +#define sp_c_arg_at(index) ((index)*wordSize + _abi(carg_1)), R1_SP + +// Implementation of SignatureHandlerGenerator + +void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { + Argument jni_arg(jni_offset()); + Register r = jni_arg.is_register() ? jni_arg.as_register() : R0; + + __ lwa(r, locals_j_arg_at(offset())); // sign extension of integer + if (DEBUG_ONLY(true ||) !jni_arg.is_register()) { + __ std(r, sp_c_arg_at(jni_arg.number())); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { + Argument jni_arg(jni_offset()); + Register r = jni_arg.is_register() ? jni_arg.as_register() : R0; + + __ ld(r, locals_j_arg_at(offset()+1)); // long resides in upper slot + if (DEBUG_ONLY(true ||) !jni_arg.is_register()) { + __ std(r, sp_c_arg_at(jni_arg.number())); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { + FloatRegister fp_reg = (_num_used_fp_arg_regs < 13/*max_fp_register_arguments*/) + ? as_FloatRegister((_num_used_fp_arg_regs++) + F1_ARG1->encoding()) + : F0; + + __ lfs(fp_reg, locals_j_arg_at(offset())); + if (DEBUG_ONLY(true ||) jni_offset() > 8) { + __ stfs(fp_reg, sp_c_arg_at(jni_offset())); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { + FloatRegister fp_reg = (_num_used_fp_arg_regs < 13/*max_fp_register_arguments*/) + ? as_FloatRegister((_num_used_fp_arg_regs++) + F1_ARG1->encoding()) + : F0; + + __ lfd(fp_reg, locals_j_arg_at(offset()+1)); + if (DEBUG_ONLY(true ||) jni_offset() > 8) { + __ stfd(fp_reg, sp_c_arg_at(jni_offset())); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { + Argument jni_arg(jni_offset()); + Register r = jni_arg.is_register() ? jni_arg.as_register() : R11_scratch1; + + // The handle for a receiver will never be null. + bool do_NULL_check = offset() != 0 || is_static(); + + Label do_null; + if (do_NULL_check) { + __ ld(R0, locals_j_arg_at(offset())); + __ cmpdi(CCR0, R0, 0); + __ li(r, 0); + __ beq(CCR0, do_null); + } + __ addir(r, locals_j_arg_at(offset())); + __ bind(do_null); + if (DEBUG_ONLY(true ||) !jni_arg.is_register()) { + __ std(r, sp_c_arg_at(jni_arg.number())); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { + // Emit fd for current codebuffer. Needs patching! + __ emit_fd(); + + // Generate code to handle arguments. + iterate(fingerprint); + + // Return the result handler. + __ load_const(R3_RET, AbstractInterpreter::result_handler(method()->result_type())); + __ blr(); + + __ flush(); +} + +#undef __ + +// Implementation of SignatureHandlerLibrary + +void SignatureHandlerLibrary::pd_set_handler(address handler) { + // patch fd here. + FunctionDescriptor* fd = (FunctionDescriptor*) handler; + + fd->set_entry(handler + (int)sizeof(FunctionDescriptor)); + assert(fd->toc() == (address)0xcafe, "need to adjust TOC here"); +} + + +// Access function to get the signature. +IRT_ENTRY(address, InterpreterRuntime::get_signature(JavaThread* thread, Method* method)) + methodHandle m(thread, method); + assert(m->is_native(), "sanity check"); + Symbol *s = m->signature(); + return (address) s->base(); +IRT_END + +IRT_ENTRY(address, InterpreterRuntime::get_result_handler(JavaThread* thread, Method* method)) + methodHandle m(thread, method); + assert(m->is_native(), "sanity check"); + return AbstractInterpreter::result_handler(m->result_type()); +IRT_END diff --git a/src/cpu/ppc/vm/interpreterRT_ppc.hpp b/src/cpu/ppc/vm/interpreterRT_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/interpreterRT_ppc.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_INTERPRETERRT_PPC_HPP +#define CPU_PPC_VM_INTERPRETERRT_PPC_HPP + +#include "memory/allocation.hpp" + +// native method calls + +class SignatureHandlerGenerator: public NativeSignatureIterator { + private: + MacroAssembler* _masm; + // number of already used floating-point argument registers + int _num_used_fp_arg_regs; + + void pass_int(); + void pass_long(); + void pass_double(); + void pass_float(); + void pass_object(); + + public: + // Creation + SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) { + _masm = new MacroAssembler(buffer); + _num_used_fp_arg_regs = 0; + } + + // Code generation + void generate(uint64_t fingerprint); +}; + +// Support for generate_slow_signature_handler. +static address get_result_handler(JavaThread* thread, Method* method); + +// A function to get the signature. +static address get_signature(JavaThread* thread, Method* method); + +#endif // CPU_PPC_VM_INTERPRETERRT_PPC_HPP diff --git a/src/cpu/ppc/vm/interpreter_ppc.cpp b/src/cpu/ppc/vm/interpreter_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/interpreter_ppc.cpp @@ -0,0 +1,736 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif + +#ifndef CC_INTERP +#error "CC_INTERP must be defined on PPC" +#endif + +#define __ _masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) // nothing +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : i = 4; break; + case T_LONG : i = 5; break; + case T_VOID : i = 6; break; + case T_FLOAT : i = 7; break; + case T_DOUBLE : i = 8; break; + case T_OBJECT : i = 9; break; + case T_ARRAY : i = 9; break; + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds"); + return i; +} + +address AbstractInterpreterGenerator::generate_slow_signature_handler() { + // Slow_signature handler that respects the PPC C calling conventions. + // + // We get called by the native entry code with our output register + // area == 8. First we call InterpreterRuntime::get_result_handler + // to copy the pointer to the signature string temporarily to the + // first C-argument and to return the result_handler in + // R3_RET. Since native_entry will copy the jni-pointer to the + // first C-argument slot later on, it is OK to occupy this slot + // temporarilly. Then we copy the argument list on the java + // expression stack into native varargs format on the native stack + // and load arguments into argument registers. Integer arguments in + // the varargs vector will be sign-extended to 8 bytes. + // + // On entry: + // R3_ARG1 - intptr_t* Address of java argument list in memory. + // R15_prev_state - BytecodeInterpreter* Address of interpreter state for + // this method + // R19_method + // + // On exit (just before return instruction): + // R3_RET - contains the address of the result_handler. + // R4_ARG2 - is not updated for static methods and contains "this" otherwise. + // R5_ARG3-R10_ARG8: - When the (i-2)th Java argument is not of type float or double, + // ARGi contains this argument. Otherwise, ARGi is not updated. + // F1_ARG1-F13_ARG13 - contain the first 13 arguments of type float or double. + + const int LogSizeOfTwoInstructions = 3; + + // FIXME: use Argument:: GL: Argument names different numbers! + const int max_fp_register_arguments = 13; + const int max_int_register_arguments = 6; // first 2 are reserved + + const Register arg_java = R21_tmp1; + const Register arg_c = R22_tmp2; + const Register signature = R23_tmp3; // is string + const Register sig_byte = R24_tmp4; + const Register fpcnt = R25_tmp5; + const Register argcnt = R26_tmp6; + const Register intSlot = R27_tmp7; + const Register target_sp = R28_tmp8; + const FloatRegister floatSlot = F0; + + address entry = __ emit_fd(); + + __ save_LR_CR(R0); + __ save_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); + // We use target_sp for storing arguments in the C frame. + __ mr(target_sp, R1_SP); + __ push_frame_abi112_nonvolatiles(0, R11_scratch1); + + __ mr(arg_java, R3_ARG1); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::get_signature), R16_thread, R19_method); + + // Signature is in R3_RET. Signature is callee saved. + __ mr(signature, R3_RET); + + // Reload method, it may have moved. +#ifdef CC_INTERP + __ ld(R19_method, state_(_method)); +#else + __ unimplemented("slow signature handler 1"); +#endif + + // Get the result handler. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::get_result_handler), R16_thread, R19_method); + + // Reload method, it may have moved. +#ifdef CC_INTERP + __ ld(R19_method, state_(_method)); +#else + __ unimplemented("slow signature handler 2"); +#endif + + { + Label L; + // test if static + // _access_flags._flags must be at offset 0. + // TODO PPC port: requires change in shared code. + //assert(in_bytes(AccessFlags::flags_offset()) == 0, + // "MethodOopDesc._access_flags == MethodOopDesc._access_flags._flags"); + // _access_flags must be a 32 bit value. + assert(sizeof(AccessFlags) == 4, "wrong size"); + __ lwa(R11_scratch1/*access_flags*/, method_(access_flags)); + // testbit with condition register. + __ testbitdi(CCR0, R0, R11_scratch1/*access_flags*/, JVM_ACC_STATIC_BIT); + __ btrue(CCR0, L); + // For non-static functions, pass "this" in R4_ARG2 and copy it + // to 2nd C-arg slot. + // We need to box the Java object here, so we use arg_java + // (address of current Java stack slot) as argument and don't + // dereference it as in case of ints, floats, etc. + __ mr(R4_ARG2, arg_java); + __ addi(arg_java, arg_java, -BytesPerWord); + __ std(R4_ARG2, _abi(carg_2), target_sp); + __ bind(L); + } + + // Will be incremented directly after loop_start. argcnt=0 + // corresponds to 3rd C argument. + __ li(argcnt, -1); + // arg_c points to 3rd C argument + __ addi(arg_c, target_sp, _abi(carg_3)); + // no floating-point args parsed so far + __ li(fpcnt, 0); + + Label move_intSlot_to_ARG, move_floatSlot_to_FARG; + Label loop_start, loop_end; + Label do_int, do_long, do_float, do_double, do_dontreachhere, do_object, do_array, do_boxed; + + // signature points to '(' at entry +#ifdef ASSERT + __ lbz(sig_byte, 0, signature); + __ cmplwi(CCR0, sig_byte, '('); + __ bne(CCR0, do_dontreachhere); +#endif + + __ bind(loop_start); + + __ addi(argcnt, argcnt, 1); + __ lbzu(sig_byte, 1, signature); + + __ cmplwi(CCR0, sig_byte, ')'); // end of signature + __ beq(CCR0, loop_end); + + __ cmplwi(CCR0, sig_byte, 'B'); // byte + __ beq(CCR0, do_int); + + __ cmplwi(CCR0, sig_byte, 'C'); // char + __ beq(CCR0, do_int); + + __ cmplwi(CCR0, sig_byte, 'D'); // double + __ beq(CCR0, do_double); + + __ cmplwi(CCR0, sig_byte, 'F'); // float + __ beq(CCR0, do_float); + + __ cmplwi(CCR0, sig_byte, 'I'); // int + __ beq(CCR0, do_int); + + __ cmplwi(CCR0, sig_byte, 'J'); // long + __ beq(CCR0, do_long); + + __ cmplwi(CCR0, sig_byte, 'S'); // short + __ beq(CCR0, do_int); + + __ cmplwi(CCR0, sig_byte, 'Z'); // boolean + __ beq(CCR0, do_int); + + __ cmplwi(CCR0, sig_byte, 'L'); // object + __ beq(CCR0, do_object); + + __ cmplwi(CCR0, sig_byte, '['); // array + __ beq(CCR0, do_array); + + // __ cmplwi(CCR0, sig_byte, 'V'); // void cannot appear since we do not parse the return type + // __ beq(CCR0, do_void); + + __ bind(do_dontreachhere); + + __ unimplemented("ShouldNotReachHere in slow_signature_handler", 120); + + __ bind(do_array); + + { + Label start_skip, end_skip; + + __ bind(start_skip); + __ lbzu(sig_byte, 1, signature); + __ cmplwi(CCR0, sig_byte, '['); + __ beq(CCR0, start_skip); // skip further brackets + __ cmplwi(CCR0, sig_byte, '9'); + __ bgt(CCR0, end_skip); // no optional size + __ cmplwi(CCR0, sig_byte, '0'); + __ bge(CCR0, start_skip); // skip optional size + __ bind(end_skip); + + __ cmplwi(CCR0, sig_byte, 'L'); + __ beq(CCR0, do_object); // for arrays of objects, the name of the object must be skipped + __ b(do_boxed); // otherwise, go directly to do_boxed + } + + __ bind(do_object); + { + Label L; + __ bind(L); + __ lbzu(sig_byte, 1, signature); + __ cmplwi(CCR0, sig_byte, ';'); + __ bne(CCR0, L); + } + // Need to box the Java object here, so we use arg_java (address of + // current Java stack slot) as argument and don't dereference it as + // in case of ints, floats, etc. + Label do_null; + __ bind(do_boxed); + __ ld(R0,0, arg_java); + __ cmpdi(CCR0, R0, 0); + __ li(intSlot,0); + __ beq(CCR0, do_null); + __ mr(intSlot, arg_java); + __ bind(do_null); + __ std(intSlot, 0, arg_c); + __ addi(arg_java, arg_java, -BytesPerWord); + __ addi(arg_c, arg_c, BytesPerWord); + __ cmplwi(CCR0, argcnt, max_int_register_arguments); + __ blt(CCR0, move_intSlot_to_ARG); + __ b(loop_start); + + __ bind(do_int); + __ lwa(intSlot, 0, arg_java); + __ std(intSlot, 0, arg_c); + __ addi(arg_java, arg_java, -BytesPerWord); + __ addi(arg_c, arg_c, BytesPerWord); + __ cmplwi(CCR0, argcnt, max_int_register_arguments); + __ blt(CCR0, move_intSlot_to_ARG); + __ b(loop_start); + + __ bind(do_long); + __ ld(intSlot, -BytesPerWord, arg_java); + __ std(intSlot, 0, arg_c); + __ addi(arg_java, arg_java, - 2 * BytesPerWord); + __ addi(arg_c, arg_c, BytesPerWord); + __ cmplwi(CCR0, argcnt, max_int_register_arguments); + __ blt(CCR0, move_intSlot_to_ARG); + __ b(loop_start); + + __ bind(do_float); + __ lfs(floatSlot, 0, arg_java); +#if defined(LINUX) + __ stfs(floatSlot, 4, arg_c); +#elif defined(AIX) + __ stfs(floatSlot, 0, arg_c); +#else +#error "unknown OS" +#endif + __ addi(arg_java, arg_java, -BytesPerWord); + __ addi(arg_c, arg_c, BytesPerWord); + __ cmplwi(CCR0, fpcnt, max_fp_register_arguments); + __ blt(CCR0, move_floatSlot_to_FARG); + __ b(loop_start); + + __ bind(do_double); + __ lfd(floatSlot, - BytesPerWord, arg_java); + __ stfd(floatSlot, 0, arg_c); + __ addi(arg_java, arg_java, - 2 * BytesPerWord); + __ addi(arg_c, arg_c, BytesPerWord); + __ cmplwi(CCR0, fpcnt, max_fp_register_arguments); + __ blt(CCR0, move_floatSlot_to_FARG); + __ b(loop_start); + + __ bind(loop_end); + + __ pop_frame(); + __ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); + __ restore_LR_CR(R0); + + __ blr(); + + Label move_int_arg, move_float_arg; + __ bind(move_int_arg); // each case must consist of 2 instructions (otherwise adapt LogSizeOfTwoInstructions) + __ mr(R5_ARG3, intSlot); __ b(loop_start); + __ mr(R6_ARG4, intSlot); __ b(loop_start); + __ mr(R7_ARG5, intSlot); __ b(loop_start); + __ mr(R8_ARG6, intSlot); __ b(loop_start); + __ mr(R9_ARG7, intSlot); __ b(loop_start); + __ mr(R10_ARG8, intSlot); __ b(loop_start); + + __ bind(move_float_arg); // each case must consist of 2 instructions (otherwise adapt LogSizeOfTwoInstructions) + __ fmr(F1_ARG1, floatSlot); __ b(loop_start); + __ fmr(F2_ARG2, floatSlot); __ b(loop_start); + __ fmr(F3_ARG3, floatSlot); __ b(loop_start); + __ fmr(F4_ARG4, floatSlot); __ b(loop_start); + __ fmr(F5_ARG5, floatSlot); __ b(loop_start); + __ fmr(F6_ARG6, floatSlot); __ b(loop_start); + __ fmr(F7_ARG7, floatSlot); __ b(loop_start); + __ fmr(F8_ARG8, floatSlot); __ b(loop_start); + __ fmr(F9_ARG9, floatSlot); __ b(loop_start); + __ fmr(F10_ARG10, floatSlot); __ b(loop_start); + __ fmr(F11_ARG11, floatSlot); __ b(loop_start); + __ fmr(F12_ARG12, floatSlot); __ b(loop_start); + __ fmr(F13_ARG13, floatSlot); __ b(loop_start); + + __ bind(move_intSlot_to_ARG); + __ sldi(R0, argcnt, LogSizeOfTwoInstructions); + __ load_const(R11_scratch1, move_int_arg); // Label must be bound here. + __ add(R11_scratch1, R0, R11_scratch1); + __ mtctr(R11_scratch1/*branch_target*/); + __ bctr(); + __ bind(move_floatSlot_to_FARG); + __ sldi(R0, fpcnt, LogSizeOfTwoInstructions); + __ addi(fpcnt, fpcnt, 1); + __ load_const(R11_scratch1, move_float_arg); // Label must be bound here. + __ add(R11_scratch1, R0, R11_scratch1); + __ mtctr(R11_scratch1/*branch_target*/); + __ bctr(); + + return entry; +} + +address AbstractInterpreterGenerator::generate_result_handler_for(BasicType type) { + // + // Registers alive + // R3_RET + // LR + // + // Registers updated + // R3_RET + // + + Label done; + Label is_false; + + address entry = __ pc(); + + switch (type) { + case T_BOOLEAN: + __ cmpwi(CCR0, R3_RET, 0); + __ beq(CCR0, is_false); + __ li(R3_RET, 1); + __ b(done); + __ bind(is_false); + __ li(R3_RET, 0); + break; + case T_BYTE: + // sign extend 8 bits + __ extsb(R3_RET, R3_RET); + break; + case T_CHAR: + // zero extend 16 bits + __ clrldi(R3_RET, R3_RET, 48); + break; + case T_SHORT: + // sign extend 16 bits + __ extsh(R3_RET, R3_RET); + break; + case T_INT: + // sign extend 32 bits + __ extsw(R3_RET, R3_RET); + break; + case T_LONG: + break; + case T_OBJECT: + // unbox result if not null + __ cmpdi(CCR0, R3_RET, 0); + __ beq(CCR0, done); + __ ld(R3_RET, 0, R3_RET); + __ verify_oop(R3_RET); + break; + case T_FLOAT: + break; + case T_DOUBLE: + break; + case T_VOID: + break; + default: ShouldNotReachHere(); + } + + __ BIND(done); + __ blr(); + + return entry; +} + +// Abstract method entry. +// +address InterpreterGenerator::generate_abstract_entry(void) { + address entry = __ pc(); + + // + // Registers alive + // R16_thread - JavaThread* + // R19_method - callee's methodOop (method to be invoked) + // R1_SP - SP prepared such that caller's outgoing args are near top + // LR - return address to caller + // + // Stack layout at this point: + // + // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP + // alignment (optional) + // [outgoing Java arguments] + // ... + // PARENT [PARENT_IJAVA_FRAME_ABI] + // ... + // + + // Can't use call_VM here because we have not set up a new + // interpreter state. Make the call to the vm and make it look like + // our caller set up the JavaFrameAnchor. + __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R12_scratch2/*tmp*/); + + // Push a new C frame and save LR. + __ save_LR_CR(R0); + __ push_frame_abi112_nonvolatiles(0, R11_scratch1); + + // This is not a leaf but we have a JavaFrameAnchor now and we will + // check (create) exceptions afterward so this is ok. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); + + // Pop the C frame and restore LR. + __ pop_frame(); + __ restore_LR_CR(R0); + + // Reset JavaFrameAnchor from call_VM_leaf above. + __ reset_last_Java_frame(); + + // Return to frame manager, it will handle the pending exception. + __ blr(); + + return entry; +} + +// Call an accessor method (assuming it is resolved, otherwise drop into +// vanilla (slow path) entry. +address InterpreterGenerator::generate_accessor_entry(void) { + if(!UseFastAccessorMethods && (!FLAG_IS_ERGO(UseFastAccessorMethods))) + return NULL; + + Label Ldone, Lslow_path; + + const Register Rthis = R3_ARG1, + Rconst_method = R4_ARG2, + Rcodes = Rconst_method, + Rcpool_cache = R5_ARG3, + Rscratch = R11_scratch1, + Rjvmti_mode = Rscratch, + Roffset = R12_scratch2, + Rflags = R6_ARG4; + + address entry = __ pc(); + + // Check for safepoint: + // Ditch this, real man don't need safepoint checks. + + // Also check for JVMTI mode + // Check for null obj, take slow path if so. +#ifdef CC_INTERP + __ ld(Rthis, Interpreter::stackElementSize, R17_tos); +#else + Unimplemented() +#endif + __ lwz(Rjvmti_mode, thread_(interp_only_mode)); + __ cmpdi(CCR1, Rthis, 0); + __ cmpwi(CCR0, Rjvmti_mode, 0); + __ crorc(/*CCR0 eq*/2, /*CCR1 eq*/4+2, /*CCR0 eq*/2); + __ beq(CCR0, Lslow_path); // this==null or jvmti_mode!=0 + + // Do 2 things in parallel: + // 1. Load the index out of the first instruction word, which looks like this: + // <0x2a><0xb4>. + // 2. Load constant pool cache base. + __ ld(Rconst_method, in_bytes(Method::const_offset()), R19_method); + __ ld(Rcpool_cache, in_bytes(ConstMethod::constants_offset()), Rconst_method); + + __ lhz(Rcodes, in_bytes(ConstMethod::codes_offset()) + 2, Rconst_method); // Lower half of 32 bit field. + __ ld(Rcpool_cache, ConstantPool::cache_offset_in_bytes(), Rcpool_cache); + + // Get the const pool entry by means of . + const int codes_shift = exact_log2(in_words(ConstantPoolCacheEntry::size()) * BytesPerWord); + __ slwi(Rscratch, Rcodes, codes_shift); // (codes&0xFFFF)<>ConstantPoolCacheEntry::tos_state_shift)&((1< 0, "referent offset not initialized"); + + if (UseG1GC) { + Label slow_path; + + // Debugging not possible, so can't use __ skip_if_jvmti_mode(slow_path, GR31_SCRATCH); + + // In the G1 code we don't check if we need to reach a safepoint. We + // continue and the thread will safepoint at the next bytecode dispatch. + + // If the receiver is null then it is OK to jump to the slow path. +#ifdef CC_INTERP + __ ld(R3_RET, Interpreter::stackElementSize, R17_tos); // get receiver +#else + Unimplemented(); +#endif + + // Check if receiver == NULL and go the slow path. + __ cmpdi(CCR0, R3_RET, 0); + __ beq(CCR0, slow_path); + + // Load the value of the referent field. + __ load_heap_oop_not_null(R3_RET, referent_offset, R3_RET); + + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. Note with + // these parameters the pre-barrier does not generate + // the load of the previous value. + + // Restore caller sp for c2i case. +#ifdef ASSERT + __ ld(R9_ARG7, 0, R1_SP); + __ ld(R10_ARG8, 0, R21_sender_SP); + __ cmpd(CCR0, R9_ARG7, R10_ARG8); + __ asm_assert_eq("backlink", 0x544); +#endif // ASSERT + __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started. + + __ g1_write_barrier_pre(noreg, // obj + noreg, // offset + R3_RET, // pre_val + R11_scratch1, // tmp + R12_scratch2, // tmp + true); // needs_frame + + __ blr(); + + // Generate regular method entry. + __ bind(slow_path); + assert(Interpreter::entry_for_kind(Interpreter::zerolocals), "Normal entry must have been generated by now"); + __ load_const_optimized(R11_scratch1, Interpreter::entry_for_kind(Interpreter::zerolocals), R0); + __ mtctr(R11_scratch1); + __ bctr(); + __ flush(); + + return entry; + } else { + return generate_accessor_entry(); + } +} + +void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) { + // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in + // the days we had adapter frames. When we deoptimize a situation where a + // compiled caller calls a compiled caller will have registers it expects + // to survive the call to the callee. If we deoptimize the callee the only + // way we can restore these registers is to have the oldest interpreter + // frame that we create restore these values. That is what this routine + // will accomplish. + + // At the moment we have modified c2 to not have any callee save registers + // so this problem does not exist and this routine is just a place holder. + + assert(f->is_interpreted_frame(), "must be interpreted"); +} diff --git a/src/cpu/ppc/vm/interpreter_ppc.hpp b/src/cpu/ppc/vm/interpreter_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/interpreter_ppc.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_INTERPRETER_PPC_HPP +#define CPU_PPC_VM_INTERPRETER_PPC_HPP + + public: + + // Stack index relative to tos (which points at value) + static int expr_index_at(int i) { + return stackElementWords * i; + } + + // Already negated by c++ interpreter + static int local_index_at(int i) { + assert(i <= 0, "local direction already negated"); + return stackElementWords * i; + } + +#endif // CPU_PPC_VM_INTERPRETER_PPC_PP diff --git a/src/cpu/ppc/vm/javaFrameAnchor_ppc.hpp b/src/cpu/ppc/vm/javaFrameAnchor_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/javaFrameAnchor_ppc.hpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_JAVAFRAMEANCHOR_PPC_HPP +#define CPU_PPC_VM_JAVAFRAMEANCHOR_PPC_HPP + +#ifndef CC_INTERP +#error "CC_INTERP must be defined on PPC64" +#endif + +public: + // Each arch must define reset, save, restore + // These are used by objects that only care about: + // 1 - initializing a new state (thread creation, javaCalls) + // 2 - saving a current state (javaCalls) + // 3 - restoring an old state (javaCalls) + + inline void clear(void) { + // clearing _last_Java_sp must be first + _last_Java_sp = NULL; + // fence? + OrderAccess::release(); + _last_Java_pc = NULL; + } + + inline void set(intptr_t* sp, address pc) { + _last_Java_pc = pc; + OrderAccess::release(); + _last_Java_sp = sp; + } + + void copy(JavaFrameAnchor* src) { + // In order to make sure the transition state is valid for "this". + // We must clear _last_Java_sp before copying the rest of the new data. + // + // Hack Alert: Temporary bugfix for 4717480/4721647 + // To act like previous version (pd_cache_state) don't NULL _last_Java_sp + // unless the value is changing. + if (_last_Java_sp != src->_last_Java_sp) { + _last_Java_sp = NULL; + OrderAccess::release(); + } + _last_Java_pc = src->_last_Java_pc; + // Must be last so profiler will always see valid frame if has_last_frame() is true. + OrderAccess::release(); + _last_Java_sp = src->_last_Java_sp; + } + + // Always walkable. + bool walkable(void) { return true; } + // Never any thing to do since we are always walkable and can find address of return addresses. + void make_walkable(JavaThread* thread) { } + + intptr_t* last_Java_sp(void) const { return _last_Java_sp; } + + address last_Java_pc(void) { return _last_Java_pc; } + + void set_last_Java_sp(intptr_t* sp) { OrderAccess::release(); _last_Java_sp = sp; } + +#endif // CPU_PPC_VM_JAVAFRAMEANCHOR_PPC_HPP diff --git a/src/cpu/ppc/vm/jniFastGetField_ppc.cpp b/src/cpu/ppc/vm/jniFastGetField_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/jniFastGetField_ppc.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "assembler_ppc.inline.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/safepoint.hpp" + + +address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + // we don't have fast jni accessors. + return (address) -1; +} + +address JNI_FastGetField::generate_fast_get_boolean_field() { + return generate_fast_get_int_field0(T_BOOLEAN); +} + +address JNI_FastGetField::generate_fast_get_byte_field() { + return generate_fast_get_int_field0(T_BYTE); +} + +address JNI_FastGetField::generate_fast_get_char_field() { + return generate_fast_get_int_field0(T_CHAR); +} + +address JNI_FastGetField::generate_fast_get_short_field() { + return generate_fast_get_int_field0(T_SHORT); +} + +address JNI_FastGetField::generate_fast_get_int_field() { + return generate_fast_get_int_field0(T_INT); +} + +address JNI_FastGetField::generate_fast_get_long_field() { + // we don't have fast jni accessors. + return (address) -1; +} + +address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) { + // e don't have fast jni accessors. + return (address) -1; +} + +address JNI_FastGetField::generate_fast_get_float_field() { + return generate_fast_get_float_field0(T_FLOAT); +} + +address JNI_FastGetField::generate_fast_get_double_field() { + return generate_fast_get_float_field0(T_DOUBLE); +} diff --git a/src/cpu/ppc/vm/jniTypes_ppc.hpp b/src/cpu/ppc/vm/jniTypes_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/jniTypes_ppc.hpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_JNITYPES_PPC_HPP +#define CPU_PPC_VM_JNITYPES_PPC_HPP + +#include "memory/allocation.hpp" +#include "oops/oop.hpp" +#include "prims/jni.h" + +// This file holds platform-dependent routines used to write primitive +// jni types to the array of arguments passed into JavaCalls::call. + +class JNITypes : AllStatic { + // These functions write a java primitive type (in native format) to + // a java stack slot array to be passed as an argument to + // JavaCalls:calls. I.e., they are functionally 'push' operations + // if they have a 'pos' formal parameter. Note that jlong's and + // jdouble's are written _in reverse_ of the order in which they + // appear in the interpreter stack. This is because call stubs (see + // stubGenerator_sparc.cpp) reverse the argument list constructed by + // JavaCallArguments (see javaCalls.hpp). + + private: + +#ifndef PPC64 +#error "ppc32 support currently not implemented!!!" +#endif // PPC64 + + public: + // Ints are stored in native format in one JavaCallArgument slot at *to. + static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; } + static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; } + static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; } + + // Longs are stored in native format in one JavaCallArgument slot at + // *(to+1). + static inline void put_long(jlong from, intptr_t *to) { + *(jlong*) (to + 1) = from; + } + + static inline void put_long(jlong from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = from; + pos += 2; + } + + static inline void put_long(jlong *from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = *from; + pos += 2; + } + + // Oops are stored in native format in one JavaCallArgument slot at *to. + static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } + static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } + static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } + static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } + static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } + + // Doubles are stored in native word format in one JavaCallArgument + // slot at *(to+1). + static inline void put_double(jdouble from, intptr_t *to) { + *(jdouble*) (to + 1) = from; + } + + static inline void put_double(jdouble from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = from; + pos += 2; + } + + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = *from; + pos += 2; + } + + // The get_xxx routines, on the other hand, actually _do_ fetch + // java primitive types from the interpreter stack. + // No need to worry about alignment on Intel. + static inline jint get_int (intptr_t *from) { return *(jint *) from; } + static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + 1); } + static inline oop get_obj (intptr_t *from) { return *(oop *) from; } + static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } + static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + 1); } +}; + +#endif // CPU_PPC_VM_JNITYPES_PPC_HPP diff --git a/src/cpu/ppc/vm/jni_ppc.h b/src/cpu/ppc/vm/jni_ppc.h new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/jni_ppc.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef CPU_PPC_VM_JNI_PPC_H +#define CPU_PPC_VM_JNI_PPC_H + +// Note: please do not change these without also changing jni_md.h in the JDK +// repository +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif +#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility) + #define JNIEXPORT __attribute__((visibility("default"))) + #define JNIIMPORT __attribute__((visibility("default"))) +#else + #define JNIEXPORT + #define JNIIMPORT +#endif + + #define JNICALL + typedef int jint; +#if defined(_LP64) + typedef long jlong; +#else + typedef long long jlong; +#endif + +typedef signed char jbyte; + +#endif // CPU_PPC_VM_JNI_PPC_H diff --git a/src/cpu/ppc/vm/macroAssembler_ppc.cpp b/src/cpu/ppc/vm/macroAssembler_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/macroAssembler_ppc.cpp @@ -0,0 +1,3017 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "gc_interface/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/cardTableModRefBS.hpp" +#include "memory/resourceArea.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/macros.hpp" +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#include "gc_implementation/g1/heapRegion.hpp" +#endif // INCLUDE_ALL_GCS + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) // nothing +#else +#define BLOCK_COMMENT(str) block_comment(str) +#endif + +#ifdef ASSERT +// On RISC, there's no benefit to verifying instruction boundaries. +bool AbstractAssembler::pd_check_instruction_mark() { return false; } +#endif + +void MacroAssembler::ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop) { + assert(Assembler::is_simm(si31, 31) && si31 >= 0, "si31 out of range"); + if (Assembler::is_simm(si31, 16)) { + ld(d, si31, a); + if (emit_filler_nop) nop(); + } else { + const int hi = MacroAssembler::largeoffset_si16_si16_hi(si31); + const int lo = MacroAssembler::largeoffset_si16_si16_lo(si31); + addis(d, a, hi); + ld(d, lo, d); + } +} + +void MacroAssembler::ld_largeoffset(Register d, int si31, Register a, int emit_filler_nop) { + assert_different_registers(d, a); + ld_largeoffset_unchecked(d, si31, a, emit_filler_nop); +} + +void MacroAssembler::load_sized_value(Register dst, RegisterOrConstant offs, Register base, + size_t size_in_bytes, bool is_signed) { + switch (size_in_bytes) { + case 8: ld(dst, offs, base); break; + case 4: is_signed ? lwa(dst, offs, base) : lwz(dst, offs, base); break; + case 2: is_signed ? lha(dst, offs, base) : lhz(dst, offs, base); break; + case 1: lbz(dst, offs, base); if (is_signed) extsb(dst, dst); break; // lba doesn't exist :( + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Register dst, RegisterOrConstant offs, Register base, + size_t size_in_bytes) { + switch (size_in_bytes) { + case 8: std(dst, offs, base); break; + case 4: stw(dst, offs, base); break; + case 2: sth(dst, offs, base); break; + case 1: stb(dst, offs, base); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::align(int modulus) { + while (offset() % modulus != 0) nop(); +} + +// Issue instructions that calculate given TOC from global TOC. +void MacroAssembler::calculate_address_from_global_toc(Register dst, address addr, bool hi16, bool lo16, + bool add_relocation, bool emit_dummy_addr) { + int offset = -1; + if (emit_dummy_addr) { + offset = -128; // dummy address + } else if (addr != (address)(intptr_t)-1) { + offset = MacroAssembler::offset_to_global_toc(addr); + } + + if (hi16) { + addis(dst, R29, MacroAssembler::largeoffset_si16_si16_hi(offset)); + } + if (lo16) { + if (add_relocation) { + // Relocate at the addi to avoid confusion with a load from the method's TOC. + relocate(internal_word_Relocation::spec(addr)); + } + addi(dst, dst, MacroAssembler::largeoffset_si16_si16_lo(offset)); + } +} + +int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) { + const int offset = MacroAssembler::offset_to_global_toc(addr); + + const address inst2_addr = a; + const int inst2 = *(int *)inst2_addr; + + // The relocation points to the second instruction, the addi, + // and the addi reads and writes the same register dst. + const int dst = inv_rt_field(inst2); + assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); + + // Now, find the preceding addis which writes to dst. + int inst1 = 0; + address inst1_addr = inst2_addr - BytesPerInstWord; + while (inst1_addr >= bound) { + inst1 = *(int *) inst1_addr; + if (is_addis(inst1) && inv_rt_field(inst1) == dst) { + // Stop, found the addis which writes dst. + break; + } + inst1_addr -= BytesPerInstWord; + } + + assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC"); + set_imm((int *)inst1_addr, MacroAssembler::largeoffset_si16_si16_hi(offset)); + set_imm((int *)inst2_addr, MacroAssembler::largeoffset_si16_si16_lo(offset)); + return (int)((intptr_t)addr - (intptr_t)inst1_addr); +} + +address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(address a, address bound) { + const address inst2_addr = a; + const int inst2 = *(int *)inst2_addr; + + // The relocation points to the second instruction, the addi, + // and the addi reads and writes the same register dst. + const int dst = inv_rt_field(inst2); + assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); + + // Now, find the preceding addis which writes to dst. + int inst1 = 0; + address inst1_addr = inst2_addr - BytesPerInstWord; + while (inst1_addr >= bound) { + inst1 = *(int *) inst1_addr; + if (is_addis(inst1) && inv_rt_field(inst1) == dst) { + // stop, found the addis which writes dst + break; + } + inst1_addr -= BytesPerInstWord; + } + + assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC"); + + int offset = (get_imm(inst1_addr, 0) << 16) + get_imm(inst2_addr, 0); + // -1 is a special case + if (offset == -1) { + return (address)(intptr_t)-1; + } else { + return global_toc() + offset; + } +} + +#ifdef _LP64 +// Patch compressed oops or klass constants. +int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) { + assert(UseCompressedOops, "Should only patch compressed oops"); + + const address inst2_addr = a; + const int inst2 = *(int *)inst2_addr; + + // The relocation points to the second instruction, the addi, + // and the addi reads and writes the same register dst. + const int dst = inv_rt_field(inst2); + assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); + // Now, find the preceding addis which writes to dst. + int inst1 = 0; + address inst1_addr = inst2_addr - BytesPerInstWord; + bool inst1_found = false; + while (inst1_addr >= bound) { + inst1 = *(int *)inst1_addr; + if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break; } + inst1_addr -= BytesPerInstWord; + } + assert(inst1_found, "inst is not lis"); + + int xc = (data >> 16) & 0xffff; + int xd = (data >> 0) & 0xffff; + + set_imm((int *)inst1_addr,((short)(xc + ((xd & 0x8000) != 0 ? 1 : 0)))); // see enc_load_con_narrow1/2 + set_imm((int *)inst2_addr, (short)(xd)); + return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr); +} + +// Get compressed oop or klass constant. +narrowOop MacroAssembler::get_narrow_oop(address a, address bound) { + assert(UseCompressedOops, "Should only patch compressed oops"); + + const address inst2_addr = a; + const int inst2 = *(int *)inst2_addr; + + // The relocation points to the second instruction, the addi, + // and the addi reads and writes the same register dst. + const int dst = inv_rt_field(inst2); + assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); + // Now, find the preceding lis which writes to dst. + int inst1 = 0; + address inst1_addr = inst2_addr - BytesPerInstWord; + bool inst1_found = false; + + while (inst1_addr >= bound) { + inst1 = *(int *) inst1_addr; + if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break;} + inst1_addr -= BytesPerInstWord; + } + assert(inst1_found, "inst is not lis"); + + uint xl = ((unsigned int) (get_imm(inst2_addr,0) & 0xffff)); + uint xh = (((((xl & 0x8000) != 0 ? -1 : 0) + get_imm(inst1_addr,0)) & 0xffff) << 16); + return (int) (xl | xh); +} +#endif // _LP64 + +void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc) { + int toc_offset = 0; + // Use RelocationHolder::none for the constant pool entry, otherwise + // we will end up with a failing NativeCall::verify(x) where x is + // the address of the constant pool entry. + // FIXME: We should insert relocation information for oops at the constant + // pool entries instead of inserting it at the loads; patching of a constant + // pool entry should be less expensive. + Unimplemented(); + if (false) { + address oop_address = address_constant((address)a.value(), RelocationHolder::none); + // Relocate at the pc of the load. + relocate(a.rspec()); + toc_offset = (int)(oop_address - code()->consts()->start()); + } + ld_largeoffset_unchecked(dst, toc_offset, toc, true); +} + +bool MacroAssembler::is_load_const_from_method_toc_at(address a) { + const address inst1_addr = a; + const int inst1 = *(int *)inst1_addr; + + // The relocation points to the ld or the addis. + return (is_ld(inst1)) || + (is_addis(inst1) && inv_ra_field(inst1) != 0); +} + +int MacroAssembler::get_offset_of_load_const_from_method_toc_at(address a) { + assert(is_load_const_from_method_toc_at(a), "must be load_const_from_method_toc"); + + const address inst1_addr = a; + const int inst1 = *(int *)inst1_addr; + + if (is_ld(inst1)) { + return inv_d1_field(inst1); + } else if (is_addis(inst1)) { + const int dst = inv_rt_field(inst1); + + // Now, find the succeeding ld which reads and writes to dst. + address inst2_addr = inst1_addr + BytesPerInstWord; + int inst2 = 0; + while (true) { + inst2 = *(int *) inst2_addr; + if (is_ld(inst2) && inv_ra_field(inst2) == dst && inv_rt_field(inst2) == dst) { + // Stop, found the ld which reads and writes dst. + break; + } + inst2_addr += BytesPerInstWord; + } + return (inv_d1_field(inst1) << 16) + inv_d1_field(inst2); + } + ShouldNotReachHere(); + return 0; +} + +// Get the constant from a `load_const' sequence. +long MacroAssembler::get_const(address a) { + assert(is_load_const_at(a), "not a load of a constant"); + const int *p = (const int*) a; + unsigned long x = (((unsigned long) (get_imm(a,0) & 0xffff)) << 48); + if (is_ori(*(p+1))) { + x |= (((unsigned long) (get_imm(a,1) & 0xffff)) << 32); + x |= (((unsigned long) (get_imm(a,3) & 0xffff)) << 16); + x |= (((unsigned long) (get_imm(a,4) & 0xffff))); + } else if (is_lis(*(p+1))) { + x |= (((unsigned long) (get_imm(a,2) & 0xffff)) << 32); + x |= (((unsigned long) (get_imm(a,1) & 0xffff)) << 16); + x |= (((unsigned long) (get_imm(a,3) & 0xffff))); + } else { + ShouldNotReachHere(); + return (long) 0; + } + return (long) x; +} + +// Patch the 64 bit constant of a `load_const' sequence. This is a low +// level procedure. It neither flushes the instruction cache nor is it +// mt safe. +void MacroAssembler::patch_const(address a, long x) { + assert(is_load_const_at(a), "not a load of a constant"); + int *p = (int*) a; + if (is_ori(*(p+1))) { + set_imm(0 + p, (x >> 48) & 0xffff); + set_imm(1 + p, (x >> 32) & 0xffff); + set_imm(3 + p, (x >> 16) & 0xffff); + set_imm(4 + p, x & 0xffff); + } else if (is_lis(*(p+1))) { + set_imm(0 + p, (x >> 48) & 0xffff); + set_imm(2 + p, (x >> 32) & 0xffff); + set_imm(1 + p, (x >> 16) & 0xffff); + set_imm(3 + p, x & 0xffff); + } else { + ShouldNotReachHere(); + } +} + +AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) { + assert(oop_recorder() != NULL, "this assembler needs a Recorder"); + int index = oop_recorder()->allocate_metadata_index(obj); + RelocationHolder rspec = metadata_Relocation::spec(index); + return AddressLiteral((address)obj, rspec); +} + +AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) { + assert(oop_recorder() != NULL, "this assembler needs a Recorder"); + int index = oop_recorder()->find_index(obj); + RelocationHolder rspec = metadata_Relocation::spec(index); + return AddressLiteral((address)obj, rspec); +} + +AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) { + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int oop_index = oop_recorder()->allocate_oop_index(obj); + return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); +} + +AddressLiteral MacroAssembler::constant_oop_address(jobject obj) { + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int oop_index = oop_recorder()->find_index(obj); + return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); +} + +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, int offset) { + intptr_t value = *delayed_value_addr; + if (value != 0) { + return RegisterOrConstant(value + offset); + } + + // Load indirectly to solve generation ordering problem. + // static address, no relocation + int simm16_offset = load_const_optimized(tmp, delayed_value_addr, noreg, true); + ld(tmp, simm16_offset, tmp); // must be aligned ((xa & 3) == 0) + + if (offset != 0) { + addi(tmp, tmp, offset); + } + + return RegisterOrConstant(tmp); +} + +#ifndef PRODUCT +void MacroAssembler::pd_print_patched_instruction(address branch) { + Unimplemented(); // TODO: PPC port +} +#endif // ndef PRODUCT + +// Conditional far branch for destinations encodable in 24+2 bits. +void MacroAssembler::bc_far(int boint, int biint, Label& dest, int optimize) { + + // If requested by flag optimize, relocate the bc_far as a + // runtime_call and prepare for optimizing it when the code gets + // relocated. + if (optimize == bc_far_optimize_on_relocate) { + relocate(relocInfo::runtime_call_type); + } + + // variant 2: + // + // b!cxx SKIP + // bxx DEST + // SKIP: + // + + const int opposite_boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(boint)), + opposite_bcond(inv_boint_bcond(boint))); + + // We emit two branches. + // First, a conditional branch which jumps around the far branch. + const address not_taken_pc = pc() + 2 * BytesPerInstWord; + const address bc_pc = pc(); + bc(opposite_boint, biint, not_taken_pc); + + const int bc_instr = *(int*)bc_pc; + assert(not_taken_pc == (address)inv_bd_field(bc_instr, (intptr_t)bc_pc), "postcondition"); + assert(opposite_boint == inv_bo_field(bc_instr), "postcondition"); + assert(boint == add_bhint_to_boint(opposite_bhint(inv_boint_bhint(inv_bo_field(bc_instr))), + opposite_bcond(inv_boint_bcond(inv_bo_field(bc_instr)))), + "postcondition"); + assert(biint == inv_bi_field(bc_instr), "postcondition"); + + // Second, an unconditional far branch which jumps to dest. + // Note: target(dest) remembers the current pc (see CodeSection::target) + // and returns the current pc if the label is not bound yet; when + // the label gets bound, the unconditional far branch will be patched. + const address target_pc = target(dest); + const address b_pc = pc(); + b(target_pc); + + assert(not_taken_pc == pc(), "postcondition"); + assert(dest.is_bound() || target_pc == b_pc, "postcondition"); +} + +bool MacroAssembler::is_bc_far_at(address instruction_addr) { + return is_bc_far_variant1_at(instruction_addr) || + is_bc_far_variant2_at(instruction_addr) || + is_bc_far_variant3_at(instruction_addr); +} + +address MacroAssembler::get_dest_of_bc_far_at(address instruction_addr) { + if (is_bc_far_variant1_at(instruction_addr)) { + const address instruction_1_addr = instruction_addr; + const int instruction_1 = *(int*)instruction_1_addr; + return (address)inv_bd_field(instruction_1, (intptr_t)instruction_1_addr); + } else if (is_bc_far_variant2_at(instruction_addr)) { + const address instruction_2_addr = instruction_addr + 4; + return bxx_destination(instruction_2_addr); + } else if (is_bc_far_variant3_at(instruction_addr)) { + return instruction_addr + 8; + } + // variant 4 ??? + ShouldNotReachHere(); + return NULL; +} +void MacroAssembler::set_dest_of_bc_far_at(address instruction_addr, address dest) { + + if (is_bc_far_variant3_at(instruction_addr)) { + // variant 3, far cond branch to the next instruction, already patched to nops: + // + // nop + // endgroup + // SKIP/DEST: + // + return; + } + + // first, extract boint and biint from the current branch + int boint = 0; + int biint = 0; + + ResourceMark rm; + const int code_size = 2 * BytesPerInstWord; + CodeBuffer buf(instruction_addr, code_size); + MacroAssembler masm(&buf); + if (is_bc_far_variant2_at(instruction_addr) && dest == instruction_addr + 8) { + // Far branch to next instruction: Optimize it by patching nops (produce variant 3). + masm.nop(); + masm.endgroup(); + } else { + if (is_bc_far_variant1_at(instruction_addr)) { + // variant 1, the 1st instruction contains the destination address: + // + // bcxx DEST + // endgroup + // + const int instruction_1 = *(int*)(instruction_addr); + boint = inv_bo_field(instruction_1); + biint = inv_bi_field(instruction_1); + } else if (is_bc_far_variant2_at(instruction_addr)) { + // variant 2, the 2nd instruction contains the destination address: + // + // b!cxx SKIP + // bxx DEST + // SKIP: + // + const int instruction_1 = *(int*)(instruction_addr); + boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(inv_bo_field(instruction_1))), + opposite_bcond(inv_boint_bcond(inv_bo_field(instruction_1)))); + biint = inv_bi_field(instruction_1); + } else { + // variant 4??? + ShouldNotReachHere(); + } + + // second, set the new branch destination and optimize the code + if (dest != instruction_addr + 4 && // the bc_far is still unbound! + masm.is_within_range_of_bcxx(dest, instruction_addr)) { + // variant 1: + // + // bcxx DEST + // endgroup + // + masm.bc(boint, biint, dest); + masm.endgroup(); + } else { + // variant 2: + // + // b!cxx SKIP + // bxx DEST + // SKIP: + // + const int opposite_boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(boint)), + opposite_bcond(inv_boint_bcond(boint))); + const address not_taken_pc = masm.pc() + 2 * BytesPerInstWord; + masm.bc(opposite_boint, biint, not_taken_pc); + masm.b(dest); + } + } + ICache::invalidate_range(instruction_addr, code_size); +} + +// Emit a NOT mt-safe patchable 64 bit absolute call/jump. +void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool link) { + // get current pc + uint64_t start_pc = (uint64_t) pc(); + + const address pc_of_bl = (address) (start_pc + (6*BytesPerInstWord)); // bl is last + const address pc_of_b = (address) (start_pc + (0*BytesPerInstWord)); // b is first + + // relocate here + if (rt != relocInfo::none) { + relocate(rt); + } + + if ( ReoptimizeCallSequences && + (( link && is_within_range_of_b(dest, pc_of_bl)) || + (!link && is_within_range_of_b(dest, pc_of_b)))) { + // variant 2: + // Emit an optimized, pc-relative call/jump. + + if (link) { + // some padding + nop(); + nop(); + nop(); + nop(); + nop(); + nop(); + + // do the call + assert(pc() == pc_of_bl, "just checking"); + bl(dest, relocInfo::none); + } else { + // do the jump + assert(pc() == pc_of_b, "just checking"); + b(dest, relocInfo::none); + + // some padding + nop(); + nop(); + nop(); + nop(); + nop(); + nop(); + } + + // Assert that we can identify the emitted call/jump. + assert(is_bxx64_patchable_variant2_at((address)start_pc, link), + "can't identify emitted call"); + } else { + // variant 1: + + mr(R0, R11); // spill R11 -> R0. + + // Load the destination address into CTR, + // calculate destination relative to global toc. + calculate_address_from_global_toc(R11, dest, true, true, false); + + mtctr(R11); + mr(R11, R0); // spill R11 <- R0. + nop(); + + // do the call/jump + if (link) { + bctrl(); + } else{ + bctr(); + } + // Assert that we can identify the emitted call/jump. + assert(is_bxx64_patchable_variant1b_at((address)start_pc, link), + "can't identify emitted call"); + } + + // Assert that we can identify the emitted call/jump. + assert(is_bxx64_patchable_at((address)start_pc, link), + "can't identify emitted call"); + assert(get_dest_of_bxx64_patchable_at((address)start_pc, link) == dest, + "wrong encoding of dest address"); +} + +// Identify a bxx64_patchable instruction. +bool MacroAssembler::is_bxx64_patchable_at(address instruction_addr, bool link) { + return is_bxx64_patchable_variant1b_at(instruction_addr, link) + //|| is_bxx64_patchable_variant1_at(instruction_addr, link) + || is_bxx64_patchable_variant2_at(instruction_addr, link); +} + +// Does the call64_patchable instruction use a pc-relative encoding of +// the call destination? +bool MacroAssembler::is_bxx64_patchable_pcrelative_at(address instruction_addr, bool link) { + // variant 2 is pc-relative + return is_bxx64_patchable_variant2_at(instruction_addr, link); +} + +// Identify variant 1. +bool MacroAssembler::is_bxx64_patchable_variant1_at(address instruction_addr, bool link) { + unsigned int* instr = (unsigned int*) instruction_addr; + return (link ? is_bctrl(instr[6]) : is_bctr(instr[6])) // bctr[l] + && is_mtctr(instr[5]) // mtctr + && is_load_const_at(instruction_addr); +} + +// Identify variant 1b: load destination relative to global toc. +bool MacroAssembler::is_bxx64_patchable_variant1b_at(address instruction_addr, bool link) { + unsigned int* instr = (unsigned int*) instruction_addr; + return (link ? is_bctrl(instr[6]) : is_bctr(instr[6])) // bctr[l] + && is_mtctr(instr[3]) // mtctr + && is_calculate_address_from_global_toc_at(instruction_addr + 2*BytesPerInstWord, instruction_addr); +} + +// Identify variant 2. +bool MacroAssembler::is_bxx64_patchable_variant2_at(address instruction_addr, bool link) { + unsigned int* instr = (unsigned int*) instruction_addr; + if (link) { + return is_bl (instr[6]) // bl dest is last + && is_nop(instr[0]) // nop + && is_nop(instr[1]) // nop + && is_nop(instr[2]) // nop + && is_nop(instr[3]) // nop + && is_nop(instr[4]) // nop + && is_nop(instr[5]); // nop + } else { + return is_b (instr[0]) // b dest is first + && is_nop(instr[1]) // nop + && is_nop(instr[2]) // nop + && is_nop(instr[3]) // nop + && is_nop(instr[4]) // nop + && is_nop(instr[5]) // nop + && is_nop(instr[6]); // nop + } +} + +// Set dest address of a bxx64_patchable instruction. +void MacroAssembler::set_dest_of_bxx64_patchable_at(address instruction_addr, address dest, bool link) { + ResourceMark rm; + int code_size = MacroAssembler::bxx64_patchable_size; + CodeBuffer buf(instruction_addr, code_size); + MacroAssembler masm(&buf); + masm.bxx64_patchable(dest, relocInfo::none, link); + ICache::invalidate_range(instruction_addr, code_size); +} + +// Get dest address of a bxx64_patchable instruction. +address MacroAssembler::get_dest_of_bxx64_patchable_at(address instruction_addr, bool link) { + if (is_bxx64_patchable_variant1_at(instruction_addr, link)) { + return (address) (unsigned long) get_const(instruction_addr); + } else if (is_bxx64_patchable_variant2_at(instruction_addr, link)) { + unsigned int* instr = (unsigned int*) instruction_addr; + if (link) { + const int instr_idx = 6; // bl is last + int branchoffset = branch_destination(instr[instr_idx], 0); + return instruction_addr + branchoffset + instr_idx*BytesPerInstWord; + } else { + const int instr_idx = 0; // b is first + int branchoffset = branch_destination(instr[instr_idx], 0); + return instruction_addr + branchoffset + instr_idx*BytesPerInstWord; + } + // Load dest relative to global toc. + } else if (is_bxx64_patchable_variant1b_at(instruction_addr, link)) { + return get_address_of_calculate_address_from_global_toc_at(instruction_addr + 2*BytesPerInstWord, + instruction_addr); + } else { + ShouldNotReachHere(); + return NULL; + } +} + +// Uses ordering which corresponds to ABI: +// _savegpr0_14: std r14,-144(r1) +// _savegpr0_15: std r15,-136(r1) +// _savegpr0_16: std r16,-128(r1) +void MacroAssembler::save_nonvolatile_gprs(Register dst, int offset) { + std(R14, offset, dst); offset += 8; + std(R15, offset, dst); offset += 8; + std(R16, offset, dst); offset += 8; + std(R17, offset, dst); offset += 8; + std(R18, offset, dst); offset += 8; + std(R19, offset, dst); offset += 8; + std(R20, offset, dst); offset += 8; + std(R21, offset, dst); offset += 8; + std(R22, offset, dst); offset += 8; + std(R23, offset, dst); offset += 8; + std(R24, offset, dst); offset += 8; + std(R25, offset, dst); offset += 8; + std(R26, offset, dst); offset += 8; + std(R27, offset, dst); offset += 8; + std(R28, offset, dst); offset += 8; + std(R29, offset, dst); offset += 8; + std(R30, offset, dst); offset += 8; + std(R31, offset, dst); offset += 8; + + stfd(F14, offset, dst); offset += 8; + stfd(F15, offset, dst); offset += 8; + stfd(F16, offset, dst); offset += 8; + stfd(F17, offset, dst); offset += 8; + stfd(F18, offset, dst); offset += 8; + stfd(F19, offset, dst); offset += 8; + stfd(F20, offset, dst); offset += 8; + stfd(F21, offset, dst); offset += 8; + stfd(F22, offset, dst); offset += 8; + stfd(F23, offset, dst); offset += 8; + stfd(F24, offset, dst); offset += 8; + stfd(F25, offset, dst); offset += 8; + stfd(F26, offset, dst); offset += 8; + stfd(F27, offset, dst); offset += 8; + stfd(F28, offset, dst); offset += 8; + stfd(F29, offset, dst); offset += 8; + stfd(F30, offset, dst); offset += 8; + stfd(F31, offset, dst); +} + +// Uses ordering which corresponds to ABI: +// _restgpr0_14: ld r14,-144(r1) +// _restgpr0_15: ld r15,-136(r1) +// _restgpr0_16: ld r16,-128(r1) +void MacroAssembler::restore_nonvolatile_gprs(Register src, int offset) { + ld(R14, offset, src); offset += 8; + ld(R15, offset, src); offset += 8; + ld(R16, offset, src); offset += 8; + ld(R17, offset, src); offset += 8; + ld(R18, offset, src); offset += 8; + ld(R19, offset, src); offset += 8; + ld(R20, offset, src); offset += 8; + ld(R21, offset, src); offset += 8; + ld(R22, offset, src); offset += 8; + ld(R23, offset, src); offset += 8; + ld(R24, offset, src); offset += 8; + ld(R25, offset, src); offset += 8; + ld(R26, offset, src); offset += 8; + ld(R27, offset, src); offset += 8; + ld(R28, offset, src); offset += 8; + ld(R29, offset, src); offset += 8; + ld(R30, offset, src); offset += 8; + ld(R31, offset, src); offset += 8; + + // FP registers + lfd(F14, offset, src); offset += 8; + lfd(F15, offset, src); offset += 8; + lfd(F16, offset, src); offset += 8; + lfd(F17, offset, src); offset += 8; + lfd(F18, offset, src); offset += 8; + lfd(F19, offset, src); offset += 8; + lfd(F20, offset, src); offset += 8; + lfd(F21, offset, src); offset += 8; + lfd(F22, offset, src); offset += 8; + lfd(F23, offset, src); offset += 8; + lfd(F24, offset, src); offset += 8; + lfd(F25, offset, src); offset += 8; + lfd(F26, offset, src); offset += 8; + lfd(F27, offset, src); offset += 8; + lfd(F28, offset, src); offset += 8; + lfd(F29, offset, src); offset += 8; + lfd(F30, offset, src); offset += 8; + lfd(F31, offset, src); +} + +// For verify_oops. +void MacroAssembler::save_volatile_gprs(Register dst, int offset) { + std(R3, offset, dst); offset += 8; + std(R4, offset, dst); offset += 8; + std(R5, offset, dst); offset += 8; + std(R6, offset, dst); offset += 8; + std(R7, offset, dst); offset += 8; + std(R8, offset, dst); offset += 8; + std(R9, offset, dst); offset += 8; + std(R10, offset, dst); offset += 8; + std(R11, offset, dst); offset += 8; + std(R12, offset, dst); +} + +// For verify_oops. +void MacroAssembler::restore_volatile_gprs(Register src, int offset) { + ld(R3, offset, src); offset += 8; + ld(R4, offset, src); offset += 8; + ld(R5, offset, src); offset += 8; + ld(R6, offset, src); offset += 8; + ld(R7, offset, src); offset += 8; + ld(R8, offset, src); offset += 8; + ld(R9, offset, src); offset += 8; + ld(R10, offset, src); offset += 8; + ld(R11, offset, src); offset += 8; + ld(R12, offset, src); +} + +void MacroAssembler::save_LR_CR(Register tmp) { + mfcr(tmp); + std(tmp, _abi(cr), R1_SP); + mflr(tmp); + std(tmp, _abi(lr), R1_SP); + // Tmp must contain lr on exit! (see return_addr and prolog in ppc64.ad) +} + +void MacroAssembler::restore_LR_CR(Register tmp) { + assert(tmp != R1_SP, "must be distinct"); + ld(tmp, _abi(lr), R1_SP); + mtlr(tmp); + ld(tmp, _abi(cr), R1_SP); + mtcr(tmp); +} + +address MacroAssembler::get_PC_trash_LR(Register result) { + Label L; + bl(L); + bind(L); + address lr_pc = pc(); + mflr(result); + return lr_pc; +} + +void MacroAssembler::resize_frame(Register offset, Register tmp) { +#ifdef ASSERT + assert_different_registers(offset, tmp, R1_SP); + andi_(tmp, offset, frame::alignment_in_bytes-1); + asm_assert_eq("resize_frame: unaligned", 0x204); +#endif + + // tmp <- *(SP) + ld(tmp, _abi(callers_sp), R1_SP); + // addr <- SP + offset; + // *(addr) <- tmp; + // SP <- addr + stdux(tmp, R1_SP, offset); +} + +void MacroAssembler::resize_frame(int offset, Register tmp) { + assert(is_simm(offset, 16), "too big an offset"); + assert_different_registers(tmp, R1_SP); + assert((offset & (frame::alignment_in_bytes-1))==0, "resize_frame: unaligned"); + // tmp <- *(SP) + ld(tmp, _abi(callers_sp), R1_SP); + // addr <- SP + offset; + // *(addr) <- tmp; + // SP <- addr + stdu(tmp, offset, R1_SP); +} + +void MacroAssembler::resize_frame_absolute(Register addr, Register tmp1, Register tmp2) { + // (addr == tmp1) || (addr == tmp2) is allowed here! + assert(tmp1 != tmp2, "must be distinct"); + + // compute offset w.r.t. current stack pointer + // tmp_1 <- addr - SP (!) + subf(tmp1, R1_SP, addr); + + // atomically update SP keeping back link. + resize_frame(tmp1/* offset */, tmp2/* tmp */); +} + +void MacroAssembler::push_frame(Register bytes, Register tmp) { +#ifdef ASSERT + assert(bytes != R0, "r0 not allowed here"); + andi_(R0, bytes, frame::alignment_in_bytes-1); + asm_assert_eq("push_frame(Reg, Reg): unaligned", 0x203); +#endif + neg(tmp, bytes); + stdux(R1_SP, R1_SP, tmp); +} + +// Push a frame of size `bytes'. +void MacroAssembler::push_frame(unsigned int bytes, Register tmp) { + long offset = align_addr(bytes, frame::alignment_in_bytes); + if (is_simm(-offset, 16)) { + stdu(R1_SP, -offset, R1_SP); + } else { + load_const(tmp, -offset); + stdux(R1_SP, R1_SP, tmp); + } +} + +// Push a frame of size `bytes' plus abi112 on top. +void MacroAssembler::push_frame_abi112(unsigned int bytes, Register tmp) { + push_frame(bytes + frame::abi_112_size, tmp); +} + +// Setup up a new C frame with a spill area for non-volatile GPRs and +// additional space for local variables. +void MacroAssembler::push_frame_abi112_nonvolatiles(unsigned int bytes, + Register tmp) { + push_frame(bytes + frame::abi_112_size + frame::spill_nonvolatiles_size, tmp); +} + +// Pop current C frame. +void MacroAssembler::pop_frame() { + ld(R1_SP, _abi(callers_sp), R1_SP); +} + +// Generic version of a call to C function via a function descriptor +// with variable support for C calling conventions (TOC, ENV, etc.). +// Updates and returns _last_calls_return_pc. +address MacroAssembler::branch_to(Register function_descriptor, bool and_link, bool save_toc_before_call, + bool restore_toc_after_call, bool load_toc_of_callee, bool load_env_of_callee) { + // we emit standard ptrgl glue code here + assert((function_descriptor != R0), "function_descriptor cannot be R0"); + + // retrieve necessary entries from the function descriptor + ld(R0, in_bytes(FunctionDescriptor::entry_offset()), function_descriptor); + mtctr(R0); + + if (load_toc_of_callee) { + ld(R2_TOC, in_bytes(FunctionDescriptor::toc_offset()), function_descriptor); + } + if (load_env_of_callee) { + ld(R11, in_bytes(FunctionDescriptor::env_offset()), function_descriptor); + } else if (load_toc_of_callee) { + li(R11, 0); + } + + // do a call or a branch + if (and_link) { + bctrl(); + } else { + bctr(); + } + _last_calls_return_pc = pc(); + + return _last_calls_return_pc; +} + +// Call a C function via a function descriptor and use full C calling +// conventions. +// We don't use the TOC in generated code, so there is no need to save +// and restore its value. +address MacroAssembler::call_c(Register fd) { + return branch_to(fd, /*and_link=*/true, + /*save toc=*/false, + /*restore toc=*/false, + /*load toc=*/true, + /*load env=*/true); +} + +address MacroAssembler::call_c(const FunctionDescriptor* fd, relocInfo::relocType rt) { + if (rt != relocInfo::none) { + // this call needs to be relocatable + if (!ReoptimizeCallSequences + || (rt != relocInfo::runtime_call_type && rt != relocInfo::none) + || fd == NULL // support code-size estimation + || !fd->is_friend_function() + || fd->entry() == NULL) { + // it's not a friend function as defined by class FunctionDescriptor, + // so do a full call-c here. + load_const(R11, (address)fd, R0); + + bool has_env = (fd != NULL && fd->env() != NULL); + return branch_to(R11, /*and_link=*/true, + /*save toc=*/false, + /*restore toc=*/false, + /*load toc=*/true, + /*load env=*/has_env); + } else { + // It's a friend function. Load the entry point and don't care about + // toc and env. Use an optimizable call instruction, but ensure the + // same code-size as in the case of a non-friend function. + nop(); + nop(); + nop(); + bl64_patchable(fd->entry(), rt); + _last_calls_return_pc = pc(); + return _last_calls_return_pc; + } + } else { + // This call does not need to be relocatable, do more aggressive + // optimizations. + if (!ReoptimizeCallSequences + || !fd->is_friend_function()) { + // It's not a friend function as defined by class FunctionDescriptor, + // so do a full call-c here. + load_const(R11, (address)fd, R0); + return branch_to(R11, /*and_link=*/true, + /*save toc=*/false, + /*restore toc=*/false, + /*load toc=*/true, + /*load env=*/true); + } else { + // it's a friend function, load the entry point and don't care about + // toc and env. + address dest = fd->entry(); + if (is_within_range_of_b(dest, pc())) { + bl(dest); + } else { + bl64_patchable(dest, rt); + } + _last_calls_return_pc = pc(); + return _last_calls_return_pc; + } + } +} + +// Call a C function. All constants needed reside in TOC. +// +// Read the address to call from the TOC. +// Read env from TOC, if fd specifies an env. +// Read new TOC from TOC. +address MacroAssembler::call_c_using_toc(const FunctionDescriptor* fd, + relocInfo::relocType rt, Register toc) { + if (!ReoptimizeCallSequences + || (rt != relocInfo::runtime_call_type && rt != relocInfo::none) + || !fd->is_friend_function()) { + // It's not a friend function as defined by class FunctionDescriptor, + // so do a full call-c here. + assert(fd->entry() != NULL, "function must be linked"); + + AddressLiteral fd_entry(fd->entry()); + load_const_from_method_toc(R11, fd_entry, toc); + mtctr(R11); + if (fd->env() == NULL) { + li(R11, 0); + nop(); + } else { + AddressLiteral fd_env(fd->env()); + load_const_from_method_toc(R11, fd_env, toc); + } + AddressLiteral fd_toc(fd->toc()); + load_toc_from_toc(R2_TOC, fd_toc, toc); + // R2_TOC is killed. + bctrl(); + _last_calls_return_pc = pc(); + } else { + // It's a friend function, load the entry point and don't care about + // toc and env. Use an optimizable call instruction, but ensure the + // same code-size as in the case of a non-friend function. + nop(); + bl64_patchable(fd->entry(), rt); + _last_calls_return_pc = pc(); + } + return _last_calls_return_pc; +} + +void MacroAssembler::call_VM_base(Register oop_result, + Register last_java_sp, + address entry_point, + bool check_exceptions) { + BLOCK_COMMENT("call_VM {"); + // Determine last_java_sp register. + if (!last_java_sp->is_valid()) { + last_java_sp = R1_SP; + } + set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, R11_scratch1); + + // ARG1 must hold thread address. + mr(R3_ARG1, R16_thread); + + address return_pc = call_c((FunctionDescriptor*)entry_point, relocInfo::none); + + reset_last_Java_frame(); + + // Check for pending exceptions. + if (check_exceptions) { + // We don't check for exceptions here. + ShouldNotReachHere(); + } + + // Get oop result if there is one and reset the value in the thread. + if (oop_result->is_valid()) { + get_vm_result(oop_result); + } + + _last_calls_return_pc = return_pc; + BLOCK_COMMENT("} call_VM"); +} + +void MacroAssembler::call_VM_leaf_base(address entry_point) { + BLOCK_COMMENT("call_VM_leaf {"); + call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, entry_point), relocInfo::none); + BLOCK_COMMENT("} call_VM_leaf"); +} + +void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { + call_VM_base(oop_result, noreg, entry_point, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, + bool check_exceptions) { + // R3_ARG1 is reserved for the thread. + mr_if_needed(R4_ARG2, arg_1); + call_VM(oop_result, entry_point, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, + bool check_exceptions) { + // R3_ARG1 is reserved for the thread + mr_if_needed(R4_ARG2, arg_1); + assert(arg_2 != R4_ARG2, "smashed argument"); + mr_if_needed(R5_ARG3, arg_2); + call_VM(oop_result, entry_point, check_exceptions); +} + +void MacroAssembler::call_VM_leaf(address entry_point) { + call_VM_leaf_base(entry_point); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { + mr_if_needed(R3_ARG1, arg_1); + call_VM_leaf(entry_point); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { + mr_if_needed(R3_ARG1, arg_1); + assert(arg_2 != R3_ARG1, "smashed argument"); + mr_if_needed(R4_ARG2, arg_2); + call_VM_leaf(entry_point); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { + mr_if_needed(R3_ARG1, arg_1); + assert(arg_2 != R3_ARG1, "smashed argument"); + mr_if_needed(R4_ARG2, arg_2); + assert(arg_3 != R3_ARG1 && arg_3 != R4_ARG2, "smashed argument"); + mr_if_needed(R5_ARG3, arg_3); + call_VM_leaf(entry_point); +} + +// Check whether instruction is a read access to the polling page +// which was emitted by load_from_polling_page(..). +bool MacroAssembler::is_load_from_polling_page(int instruction, void* ucontext, + address* polling_address_ptr) { + if (!is_ld(instruction)) + return false; // It's not a ld. Fail. + + int rt = inv_rt_field(instruction); + int ra = inv_ra_field(instruction); + int ds = inv_ds_field(instruction); + if (!(ds == 0 && ra != 0 && rt == 0)) { + return false; // It's not a ld(r0, X, ra). Fail. + } + + if (!ucontext) { + // Set polling address. + if (polling_address_ptr != NULL) { + *polling_address_ptr = NULL; + } + return true; // No ucontext given. Can't check value of ra. Assume true. + } + +#ifdef LINUX + // Ucontext given. Check that register ra contains the address of + // the safepoing polling page. + ucontext_t* uc = (ucontext_t*) ucontext; + // Set polling address. + address addr = (address)uc->uc_mcontext.regs->gpr[ra] + (ssize_t)ds; + if (polling_address_ptr != NULL) { + *polling_address_ptr = addr; + } + return os::is_poll_address(addr); +#else + // Not on Linux, ucontext must be NULL. + ShouldNotReachHere(); + return false; +#endif +} + +bool MacroAssembler::is_memory_serialization(int instruction, JavaThread* thread, void* ucontext) { +#ifdef LINUX + ucontext_t* uc = (ucontext_t*) ucontext; + + if (is_stwx(instruction) || is_stwux(instruction)) { + int ra = inv_ra_field(instruction); + int rb = inv_rb_field(instruction); + + // look up content of ra and rb in ucontext + address ra_val=(address)uc->uc_mcontext.regs->gpr[ra]; + long rb_val=(long)uc->uc_mcontext.regs->gpr[rb]; + return os::is_memory_serialize_page(thread, ra_val+rb_val); + } else if (is_stw(instruction) || is_stwu(instruction)) { + int ra = inv_ra_field(instruction); + int d1 = inv_d1_field(instruction); + + // look up content of ra in ucontext + address ra_val=(address)uc->uc_mcontext.regs->gpr[ra]; + return os::is_memory_serialize_page(thread, ra_val+d1); + } else { + return false; + } +#else + // workaround not needed on !LINUX :-) + ShouldNotCallThis(); + return false; +#endif +} + +void MacroAssembler::bang_stack_with_offset(int offset) { + // When increasing the stack, the old stack pointer will be written + // to the new top of stack according to the PPC64 abi. + // Therefore, stack banging is not necessary when increasing + // the stack by <= os::vm_page_size() bytes. + // When increasing the stack by a larger amount, this method is + // called repeatedly to bang the intermediate pages. + + // Stack grows down, caller passes positive offset. + assert(offset > 0, "must bang with positive offset"); + + long stdoffset = -offset; + + if (is_simm(stdoffset, 16)) { + // Signed 16 bit offset, a simple std is ok. + if (UseLoadInstructionsForStackBangingPPC64) { + ld(R0, (int)(signed short)stdoffset, R1_SP); + } else { + std(R0,(int)(signed short)stdoffset, R1_SP); + } + } else if (is_simm(stdoffset, 31)) { + const int hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset); + const int lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset); + + Register tmp = R11; + addis(tmp, R1_SP, hi); + if (UseLoadInstructionsForStackBangingPPC64) { + ld(R0, lo, tmp); + } else { + std(R0, lo, tmp); + } + } else { + ShouldNotReachHere(); + } +} + +// If instruction is a stack bang of the form +// std R0, x(Ry), (see bang_stack_with_offset()) +// stdu R1_SP, x(R1_SP), (see push_frame(), resize_frame()) +// or stdux R1_SP, Rx, R1_SP (see push_frame(), resize_frame()) +// return the banged address. Otherwise, return 0. +address MacroAssembler::get_stack_bang_address(int instruction, void *ucontext) { +#ifdef LINUX + ucontext_t* uc = (ucontext_t*) ucontext; + int rs = inv_rs_field(instruction); + int ra = inv_ra_field(instruction); + if ( (is_ld(instruction) && rs == 0 && UseLoadInstructionsForStackBangingPPC64) + || (is_std(instruction) && rs == 0 && !UseLoadInstructionsForStackBangingPPC64) + || (is_stdu(instruction) && rs == 1)) { + int ds = inv_ds_field(instruction); + // return banged address + return ds+(address)uc->uc_mcontext.regs->gpr[ra]; + } else if (is_stdux(instruction) && rs == 1) { + int rb = inv_rb_field(instruction); + address sp = (address)uc->uc_mcontext.regs->gpr[1]; + long rb_val = (long)uc->uc_mcontext.regs->gpr[rb]; + return ra != 1 || rb_val >= 0 ? NULL // not a stack bang + : sp + rb_val; // banged address + } + return NULL; // not a stack bang +#else + // workaround not needed on !LINUX :-) + ShouldNotCallThis(); + return NULL; +#endif +} + +// CmpxchgX sets condition register to cmpX(current, compare). +void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_value, + Register compare_value, Register exchange_value, + Register addr_base, int semantics, bool cmpxchgx_hint, + Register int_flag_success, bool contention_hint) { + Label retry; + Label failed; + Label done; + + // Save one branch if result is returned via register and + // result register is different from the other ones. + bool use_result_reg = (int_flag_success != noreg); + bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value && + int_flag_success != exchange_value && int_flag_success != addr_base); + + // release/fence semantics + if (semantics & MemBarRel) { + release(); + } + + if (use_result_reg && preset_result_reg) { + li(int_flag_success, 0); // preset (assume cas failed) + } + + // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM). + if (contention_hint) { // Don't try to reserve if cmp fails. + lwz(dest_current_value, 0, addr_base); + cmpw(flag, dest_current_value, compare_value); + bne(flag, failed); + } + + // atomic emulation loop + bind(retry); + + lwarx(dest_current_value, addr_base, cmpxchgx_hint); + cmpw(flag, dest_current_value, compare_value); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(flag, failed); + } else { + bne( flag, failed); + } + // branch to done => (flag == ne), (dest_current_value != compare_value) + // fall through => (flag == eq), (dest_current_value == compare_value) + + stwcx_(exchange_value, addr_base); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0. + } else { + bne( CCR0, retry); // StXcx_ sets CCR0. + } + // fall through => (flag == eq), (dest_current_value == compare_value), (swapped) + + // Result in register (must do this at the end because int_flag_success can be the + // same register as one above). + if (use_result_reg) { + li(int_flag_success, 1); + } + + if (semantics & MemBarFenceAfter) { + fence(); + } else if (semantics & MemBarAcq) { + isync(); + } + + if (use_result_reg && !preset_result_reg) { + b(done); + } + + bind(failed); + if (use_result_reg && !preset_result_reg) { + li(int_flag_success, 0); + } + + bind(done); + // (flag == ne) => (dest_current_value != compare_value), (!swapped) + // (flag == eq) => (dest_current_value == compare_value), ( swapped) +} + +// Preforms atomic compare exchange: +// if (compare_value == *addr_base) +// *addr_base = exchange_value +// int_flag_success = 1; +// else +// int_flag_success = 0; +// +// ConditionRegister flag = cmp(compare_value, *addr_base) +// Register dest_current_value = *addr_base +// Register compare_value Used to compare with value in memory +// Register exchange_value Written to memory if compare_value == *addr_base +// Register addr_base The memory location to compareXChange +// Register int_flag_success Set to 1 if exchange_value was written to *addr_base +// +// To avoid the costly compare exchange the value is tested beforehand. +// Several special cases exist to avoid that unnecessary information is generated. +// +void MacroAssembler::cmpxchgd(ConditionRegister flag, + Register dest_current_value, Register compare_value, Register exchange_value, + Register addr_base, int semantics, bool cmpxchgx_hint, + Register int_flag_success, Label* failed_ext, bool contention_hint) { + Label retry; + Label failed_int; + Label& failed = (failed_ext != NULL) ? *failed_ext : failed_int; + Label done; + + // Save one branch if result is returned via register and result register is different from the other ones. + bool use_result_reg = (int_flag_success!=noreg); + bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value && + int_flag_success!=exchange_value && int_flag_success!=addr_base); + assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both"); + + // release/fence semantics + if (semantics & MemBarRel) { + release(); + } + + if (use_result_reg && preset_result_reg) { + li(int_flag_success, 0); // preset (assume cas failed) + } + + // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM). + if (contention_hint) { // Don't try to reserve if cmp fails. + ld(dest_current_value, 0, addr_base); + cmpd(flag, dest_current_value, compare_value); + bne(flag, failed); + } + + // atomic emulation loop + bind(retry); + + ldarx(dest_current_value, addr_base, cmpxchgx_hint); + cmpd(flag, dest_current_value, compare_value); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(flag, failed); + } else { + bne( flag, failed); + } + + stdcx_(exchange_value, addr_base); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0 + } else { + bne( CCR0, retry); // stXcx_ sets CCR0 + } + + // result in register (must do this at the end because int_flag_success can be the same register as one above) + if (use_result_reg) { + li(int_flag_success, 1); + } + + // POWER6 doesn't need isync in CAS. + // Always emit isync to be on the safe side. + if (semantics & MemBarFenceAfter) { + fence(); + } else if (semantics & MemBarAcq) { + isync(); + } + + if (use_result_reg && !preset_result_reg) { + b(done); + } + + bind(failed_int); + if (use_result_reg && !preset_result_reg) { + li(int_flag_success, 0); + } + + bind(done); + // (flag == ne) => (dest_current_value != compare_value), (!swapped) + // (flag == eq) => (dest_current_value == compare_value), ( swapped) +} + +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Register sethi_temp, + Label& L_no_such_interface) { + assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); + assert(itable_index.is_constant() || itable_index.as_register() == method_result, + "caller must use same register for non-constant itable index as for method"); + + // Compute start of first itableOffsetEntry (which is at the end of the vtable). + int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; + int itentry_off = itableMethodEntry::method_offset_in_bytes(); + int logMEsize = exact_log2(itableMethodEntry::size() * wordSize); + int scan_step = itableOffsetEntry::size() * wordSize; + int log_vte_size= exact_log2(vtableEntry::size() * wordSize); + + lwz(scan_temp, InstanceKlass::vtable_length_offset() * wordSize, recv_klass); + // %%% We should store the aligned, prescaled offset in the klassoop. + // Then the next several instructions would fold away. + + sldi(scan_temp, scan_temp, log_vte_size); + addi(scan_temp, scan_temp, vtable_base); + add(scan_temp, recv_klass, scan_temp); + + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + if (itable_index.is_register()) { + Register itable_offset = itable_index.as_register(); + sldi(itable_offset, itable_offset, logMEsize); + if (itentry_off) addi(itable_offset, itable_offset, itentry_off); + add(recv_klass, itable_offset, recv_klass); + } else { + long itable_offset = (long)itable_index.as_constant(); + load_const_optimized(sethi_temp, (itable_offset<itable(); scan->interface() != NULL; scan += scan_step) { + // if (scan->interface() == intf) { + // result = (klass + scan->offset() + itable_index); + // } + // } + Label search, found_method; + + for (int peel = 1; peel >= 0; peel--) { + // %%%% Could load both offset and interface in one ldx, if they were + // in the opposite order. This would save a load. + ld(method_result, itableOffsetEntry::interface_offset_in_bytes(), scan_temp); + + // Check that this entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + cmpd(CCR0, method_result, intf_klass); + + if (peel) { + beq(CCR0, found_method); + } else { + bne(CCR0, search); + // (invert the test to fall through to found_method...) + } + + if (!peel) break; + + bind(search); + + cmpdi(CCR0, method_result, 0); + beq(CCR0, L_no_such_interface); + addi(scan_temp, scan_temp, scan_step); + } + + bind(found_method); + + // Got a hit. + int ito_offset = itableOffsetEntry::offset_offset_in_bytes(); + lwz(scan_temp, ito_offset, scan_temp); + ldx(method_result, scan_temp, recv_klass); +} + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result) { + + assert_different_registers(recv_klass, method_result, vtable_index.register_or_noreg()); + + const int base = InstanceKlass::vtable_start_offset() * wordSize; + assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + + if (vtable_index.is_register()) { + sldi(vtable_index.as_register(), vtable_index.as_register(), LogBytesPerWord); + add(recv_klass, vtable_index.as_register(), recv_klass); + } else { + addi(recv_klass, recv_klass, vtable_index.as_constant() << LogBytesPerWord); + } + ld(R19_method, base + vtableEntry::method_offset_in_bytes(), recv_klass); +} + +/////////////////////////////////////////// subtype checking //////////////////////////////////////////// + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp1_reg, + Register temp2_reg, + Label& L_success, + Label& L_failure) { + + const Register check_cache_offset = temp1_reg; + const Register cached_super = temp2_reg; + + assert_different_registers(sub_klass, super_klass, check_cache_offset, cached_super); + + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + cmpd(CCR0, sub_klass, super_klass); + beq(CCR0, L_success); + + // Check the supertype display: + lwz(check_cache_offset, sco_offset, super_klass); + // The loaded value is the offset from KlassOopDesc. + + ldx(cached_super, check_cache_offset, sub_klass); + cmpd(CCR0, cached_super, super_klass); + beq(CCR0, L_success); + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + cmpwi(CCR0, check_cache_offset, sc_offset); + bne(CCR0, L_failure); + // bind(slow_path); // fallthru +} + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp1_reg, + Register temp2_reg, + Label* L_success, + Register result_reg) { + const Register array_ptr = temp1_reg; // current value from cache array + const Register temp = temp2_reg; + + assert_different_registers(sub_klass, super_klass, array_ptr, temp); + + int source_offset = in_bytes(Klass::secondary_supers_offset()); + int target_offset = in_bytes(Klass::secondary_super_cache_offset()); + + int length_offset = Array::length_offset_in_bytes(); + int base_offset = Array::base_offset_in_bytes(); + + Label hit, loop, failure, fallthru; + + ld(array_ptr, source_offset, sub_klass); + + //assert(4 == arrayOopDesc::length_length_in_bytes(), "precondition violated."); + lwz(temp, length_offset, array_ptr); + cmpwi(CCR0, temp, 0); + beq(CCR0, result_reg!=noreg ? failure : fallthru); // length 0 + + mtctr(temp); // load ctr + + bind(loop); + // Oops in table are NO MORE compressed. + ld(temp, base_offset, array_ptr); + cmpd(CCR0, temp, super_klass); + beq(CCR0, hit); + addi(array_ptr, array_ptr, BytesPerWord); + bdnz(loop); + + bind(failure); + if (result_reg!=noreg) li(result_reg, 1); // load non-zero result (indicates a miss) + b(fallthru); + + bind(hit); + std(super_klass, target_offset, sub_klass); // save result to cache + if (result_reg != noreg) li(result_reg, 0); // load zero result (indicates a hit) + if (L_success != NULL) b(*L_success); + + bind(fallthru); +} + +// Try fast path, then go to slow one if not successful +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp1_reg, + Register temp2_reg, + Label& L_success) { + Label L_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, temp2_reg, L_success, L_failure); + check_klass_subtype_slow_path(sub_klass, super_klass, temp1_reg, temp2_reg, &L_success); + bind(L_failure); // Fallthru if not successful. +} + +void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg, + Register temp_reg, + Label& wrong_method_type) { + assert_different_registers(mtype_reg, mh_reg, temp_reg); + // Compare method type against that of the receiver. + load_heap_oop_not_null(temp_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg), mh_reg); + cmpd(CCR0, temp_reg, mtype_reg); + bne(CCR0, wrong_method_type); +} + +RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot, + Register temp_reg, + int extra_slot_offset) { + // cf. TemplateTable::prepare_invoke(), if (load_receiver). + int stackElementSize = Interpreter::stackElementSize; + int offset = extra_slot_offset * stackElementSize; + if (arg_slot.is_constant()) { + offset += arg_slot.as_constant() * stackElementSize; + return offset; + } else { + assert(temp_reg != noreg, "must specify"); + sldi(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); + if (offset != 0) + addi(temp_reg, temp_reg, offset); + return temp_reg; + } +} + +void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj_reg, + Register mark_reg, Register temp_reg, + Register temp2_reg, Label& done, Label* slow_case) { + assert(UseBiasedLocking, "why call this otherwise?"); + +#ifdef ASSERT + assert_different_registers(obj_reg, mark_reg, temp_reg, temp2_reg); +#endif + + Label cas_label; + + // Branch to done if fast path fails and no slow_case provided. + Label *slow_case_int = (slow_case != NULL) ? slow_case : &done; + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, + "biased locking makes assumptions about bit layout"); + + if (PrintBiasedLockingStatistics) { + load_const(temp_reg, (address) BiasedLocking::total_entry_count_addr(), temp2_reg); + lwz(temp2_reg, 0, temp_reg); + addi(temp2_reg, temp2_reg, 1); + stw(temp2_reg, 0, temp_reg); + } + + andi(temp_reg, mark_reg, markOopDesc::biased_lock_mask_in_place); + cmpwi(cr_reg, temp_reg, markOopDesc::biased_lock_pattern); + bne(cr_reg, cas_label); + + load_klass_with_trap_null_check(temp_reg, obj_reg); + + load_const_optimized(temp2_reg, ~((int) markOopDesc::age_mask_in_place)); + ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg); + orr(temp_reg, R16_thread, temp_reg); + xorr(temp_reg, mark_reg, temp_reg); + andr(temp_reg, temp_reg, temp2_reg); + cmpdi(cr_reg, temp_reg, 0); + if (PrintBiasedLockingStatistics) { + Label l; + bne(cr_reg, l); + load_const(mark_reg, (address) BiasedLocking::biased_lock_entry_count_addr()); + lwz(temp2_reg, 0, mark_reg); + addi(temp2_reg, temp2_reg, 1); + stw(temp2_reg, 0, mark_reg); + // restore mark_reg + ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg); + bind(l); + } + beq(cr_reg, done); + + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + andi(temp2_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); + cmpwi(cr_reg, temp2_reg, 0); + bne(cr_reg, try_revoke_bias); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + + int shift_amount = 64 - markOopDesc::epoch_shift; + // rotate epoch bits to right (little) end and set other bits to 0 + // [ big part | epoch | little part ] -> [ 0..0 | epoch ] + rldicl_(temp2_reg, temp_reg, shift_amount, 64 - markOopDesc::epoch_bits); + // branch if epoch bits are != 0, i.e. they differ, because the epoch has been incremented + bne(CCR0, try_rebias); + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + andi(mark_reg, mark_reg, (markOopDesc::biased_lock_mask_in_place | + markOopDesc::age_mask_in_place | + markOopDesc::epoch_mask_in_place)); + orr(temp_reg, R16_thread, mark_reg); + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). + fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ? + cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, + /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, + /*where=*/obj_reg, + MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_acquire_lock(), + noreg, slow_case_int); // bail out if failed + + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + if (PrintBiasedLockingStatistics) { + load_const(temp_reg, (address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), temp2_reg); + lwz(temp2_reg, 0, temp_reg); + addi(temp2_reg, temp2_reg, 1); + stw(temp2_reg, 0, temp_reg); + } + b(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + andi(temp_reg, mark_reg, markOopDesc::age_mask_in_place); + orr(temp_reg, R16_thread, temp_reg); + load_klass_with_trap_null_check(temp2_reg, obj_reg); + ld(temp2_reg, in_bytes(Klass::prototype_header_offset()), temp2_reg); + orr(temp_reg, temp_reg, temp2_reg); + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). + fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ? + cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, + /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, + /*where=*/obj_reg, + MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_acquire_lock(), + noreg, slow_case_int); // bail out if failed + + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + if (PrintBiasedLockingStatistics) { + load_const(temp_reg, (address) BiasedLocking::rebiased_lock_entry_count_addr(), temp2_reg); + lwz(temp2_reg, 0, temp_reg); + addi(temp2_reg, temp2_reg, 1); + stw(temp2_reg, 0, temp_reg); + } + b(done); + + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + load_klass_with_trap_null_check(temp_reg, obj_reg); + ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg); + andi(temp2_reg, mark_reg, markOopDesc::age_mask_in_place); + orr(temp_reg, temp_reg, temp2_reg); + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). + fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ? + cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, + /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, + /*where=*/obj_reg, + MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_acquire_lock()); + + // reload markOop in mark_reg before continuing with lightweight locking + ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg); + + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (PrintBiasedLockingStatistics) { + Label l; + bne(cr_reg, l); + load_const(temp_reg, (address) BiasedLocking::revoked_lock_entry_count_addr(), temp2_reg); + lwz(temp2_reg, 0, temp_reg); + addi(temp2_reg, temp2_reg, 1); + stw(temp2_reg, 0, temp_reg); + bind(l); + } + + bind(cas_label); +} + +void MacroAssembler::biased_locking_exit (ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done) { + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + + ld(temp_reg, 0, mark_addr); + andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); + + cmpwi(cr_reg, temp_reg, markOopDesc::biased_lock_pattern); + beq(cr_reg, done); +} + +// "The box" is the space on the stack where we copy the object mark. +void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box, + Register temp, Register displaced_header, Register current_header) { + assert_different_registers(oop, box, temp, displaced_header, current_header); + assert(flag != CCR0, "bad condition register"); + Label cont; + Label object_has_monitor; + Label cas_failed; + + // Load markOop from object into displaced_header. + ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop); + + + // Always do locking in runtime. + if (EmitSync & 0x01) { + cmpdi(flag, oop, 0); // Oop can't be 0 here => always false. + return; + } + + if (UseBiasedLocking) { + biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont); + } + + // Handle existing monitor. + if ((EmitSync & 0x02) == 0) { + // The object has an existing monitor iff (mark & monitor_value) != 0. + andi_(temp, displaced_header, markOopDesc::monitor_value); + bne(CCR0, object_has_monitor); + } + + // Set displaced_header to be (markOop of object | UNLOCK_VALUE). + ori(displaced_header, displaced_header, markOopDesc::unlocked_value); + + // Load Compare Value application register. + + // Initialize the box. (Must happen before we update the object mark!) + std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box); + + // Must fence, otherwise, preceding store(s) may float below cmpxchg. + // Compare object markOop with mark and if equal exchange scratch1 with object markOop. + // CmpxchgX sets cr_reg to cmpX(current, displaced). + cmpxchgd(/*flag=*/flag, + /*current_value=*/current_header, + /*compare_value=*/displaced_header, + /*exchange_value=*/box, + /*where=*/oop, + MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_acquire_lock(), + noreg, + &cas_failed); + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + // If the compare-and-exchange succeeded, then we found an unlocked + // object and we have now locked it. + b(cont); + + bind(cas_failed); + // We did not see an unlocked object so try the fast recursive case. + + // Check if the owner is self by comparing the value in the markOop of object + // (current_header) with the stack pointer. + sub(current_header, current_header, R1_SP); + load_const_optimized(temp, (address) (~(os::vm_page_size()-1) | + markOopDesc::lock_mask_in_place)); + + and_(R0/*==0?*/, current_header, temp); + // If condition is true we are cont and hence we can store 0 as the + // displaced header in the box, which indicates that it is a recursive lock. + mcrf(flag,CCR0); + std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box); + + // Handle existing monitor. + if ((EmitSync & 0x02) == 0) { + b(cont); + + bind(object_has_monitor); + // The object's monitor m is unlocked iff m->owner == NULL, + // otherwise m->owner may contain a thread or a stack address. + // + // Try to CAS m->owner from NULL to current thread. + addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value); + li(displaced_header, 0); + // CmpxchgX sets flag to cmpX(current, displaced). + cmpxchgd(/*flag=*/flag, + /*current_value=*/current_header, + /*compare_value=*/displaced_header, + /*exchange_value=*/R16_thread, + /*where=*/temp, + MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_acquire_lock()); + + // Store a non-null value into the box. + std(box, BasicLock::displaced_header_offset_in_bytes(), box); + +# ifdef ASSERT + bne(flag, cont); + // We have acquired the monitor, check some invariants. + addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes()); + // Invariant 1: _recursions should be 0. + //assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size"); + asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp, + "monitor->_recursions should be 0", -1); + // Invariant 2: OwnerIsThread shouldn't be 0. + //assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size"); + //asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp, + // "monitor->OwnerIsThread shouldn't be 0", -1); +# endif + } + + bind(cont); + // flag == EQ indicates success + // flag == NE indicates failure +} + +void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, + Register temp, Register displaced_header, Register current_header) { + assert_different_registers(oop, box, temp, displaced_header, current_header); + assert(flag != CCR0, "bad condition register"); + Label cont; + Label object_has_monitor; + + // Always do locking in runtime. + if (EmitSync & 0x01) { + cmpdi(flag, oop, 0); // Oop can't be 0 here => always false. + return; + } + + if (UseBiasedLocking) { + biased_locking_exit(flag, oop, current_header, cont); + } + + // Find the lock address and load the displaced header from the stack. + ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box); + + // If the displaced header is 0, we have a recursive unlock. + cmpdi(flag, displaced_header, 0); + beq(flag, cont); + + // Handle existing monitor. + if ((EmitSync & 0x02) == 0) { + // The object has an existing monitor iff (mark & monitor_value) != 0. + ld(current_header, oopDesc::mark_offset_in_bytes(), oop); + andi(temp, current_header, markOopDesc::monitor_value); + cmpdi(flag, temp, 0); + bne(flag, object_has_monitor); + } + + + // Check if it is still a light weight lock, this is is true if we see + // the stack address of the basicLock in the markOop of the object. + // Cmpxchg sets flag to cmpd(current_header, box). + cmpxchgd(/*flag=*/flag, + /*current_value=*/current_header, + /*compare_value=*/box, + /*exchange_value=*/displaced_header, + /*where=*/oop, + MacroAssembler::MemBarRel, + MacroAssembler::cmpxchgx_hint_release_lock(), + noreg, + &cont); + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + // Handle existing monitor. + if ((EmitSync & 0x02) == 0) { + b(cont); + + bind(object_has_monitor); + addi(current_header, current_header, -markOopDesc::monitor_value); // monitor + ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header); + ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header); + xorr(temp, R16_thread, temp); // Will be 0 if we are the owner. + orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions. + cmpdi(flag, temp, 0); + bne(flag, cont); + + ld(temp, ObjectMonitor::EntryList_offset_in_bytes(), current_header); + ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header); + orr(temp, temp, displaced_header); // Will be 0 if both are 0. + cmpdi(flag, temp, 0); + bne(flag, cont); + release(); + std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header); + } + + bind(cont); + // flag == EQ indicates success + // flag == NE indicates failure +} + +// Write serialization page so VM thread can do a pseudo remote membar. +// We use the current thread pointer to calculate a thread specific +// offset to write to within the page. This minimizes bus traffic +// due to cache line collision. +void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) { + srdi(tmp2, thread, os::get_serialize_page_shift_count()); + + int mask = os::vm_page_size() - sizeof(int); + if (Assembler::is_simm(mask, 16)) { + andi(tmp2, tmp2, mask); + } else { + lis(tmp1, (int)((signed short) (mask >> 16))); + ori(tmp1, tmp1, mask & 0x0000ffff); + andr(tmp2, tmp2, tmp1); + } + + load_const(tmp1, (long) os::get_memory_serialize_page()); + release(); + stwx(R0, tmp1, tmp2); +} + + +// GC barrier helper macros + +// Write the card table byte if needed. +void MacroAssembler::card_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp) { + CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableModRef || + bs->kind() == BarrierSet::CardTableExtension, "wrong barrier"); +#ifdef ASSERT + cmpdi(CCR0, Rnew_val, 0); + asm_assert_ne("null oop not allowed", 0x321); +#endif + card_table_write(bs->byte_map_base, Rtmp, Rstore_addr); +} + +// Write the card table byte. +void MacroAssembler::card_table_write(jbyte* byte_map_base, Register Rtmp, Register Robj) { + assert_different_registers(Robj, Rtmp, R0); + load_const_optimized(Rtmp, (address)byte_map_base, R0); + srdi(Robj, Robj, CardTableModRefBS::card_shift); + li(R0, 0); // dirty + if (UseConcMarkSweepGC) release(); + stbx(R0, Rtmp, Robj); +} + +#ifndef SERIALGC + +// General G1 pre-barrier generator. +// Goal: record the previous value if it is not null. +void MacroAssembler::g1_write_barrier_pre(Register Robj, RegisterOrConstant offset, Register Rpre_val, + Register Rtmp1, Register Rtmp2, bool needs_frame) { + Label runtime, filtered; + + // Is marking active? + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + lwz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread); + } else { + guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); + lbz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread); + } + cmpdi(CCR0, Rtmp1, 0); + beq(CCR0, filtered); + + // Do we need to load the previous value? + if (Robj != noreg) { + // Load the previous value... + if (UseCompressedOops) { + lwz(Rpre_val, offset, Robj); + } else { + ld(Rpre_val, offset, Robj); + } + // Previous value has been loaded into Rpre_val. + } + assert(Rpre_val != noreg, "must have a real register"); + + // Is the previous value null? + cmpdi(CCR0, Rpre_val, 0); + beq(CCR0, filtered); + + if (Robj != noreg && UseCompressedOops) { + decode_heap_oop_not_null(Rpre_val); + } + + // OK, it's not filtered, so we'll need to call enqueue. In the normal + // case, pre_val will be a scratch G-reg, but there are some cases in + // which it's an O-reg. In the first case, do a normal call. In the + // latter, do a save here and call the frameless version. + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + const Register Rbuffer = Rtmp1, Rindex = Rtmp2; + + ld(Rindex, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread); + cmpdi(CCR0, Rindex, 0); + beq(CCR0, runtime); // If index == 0, goto runtime. + ld(Rbuffer, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_buf()), R16_thread); + + addi(Rindex, Rindex, -wordSize); // Decrement index. + std(Rindex, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread); + + // Record the previous value. + stdx(Rpre_val, Rbuffer, Rindex); + b(filtered); + + bind(runtime); + + // VM call need frame to access(write) O register. + if (needs_frame) { + save_LR_CR(Rtmp1); + push_frame_abi112(0, Rtmp2); + } + + if (Rpre_val->is_volatile() && Robj == noreg) mr(R31, Rpre_val); // Save pre_val across C call if it was preloaded. + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), Rpre_val, R16_thread); + if (Rpre_val->is_volatile() && Robj == noreg) mr(Rpre_val, R31); // restore + + if (needs_frame) { + pop_frame(); + restore_LR_CR(Rtmp1); + } + + bind(filtered); +} + +// General G1 post-barrier generator +// Store cross-region card. +void MacroAssembler::g1_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp1, Register Rtmp2, Register Rtmp3, Label *filtered_ext) { + Label runtime, filtered_int; + Label& filtered = (filtered_ext != NULL) ? *filtered_ext : filtered_int; + assert_different_registers(Rstore_addr, Rnew_val, Rtmp1, Rtmp2); + + G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::G1SATBCT || + bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier"); + + // Does store cross heap regions? + if (G1RSBarrierRegionFilter) { + xorr(Rtmp1, Rstore_addr, Rnew_val); + srdi_(Rtmp1, Rtmp1, HeapRegion::LogOfHRGrainBytes); + beq(CCR0, filtered); + } + + // Crosses regions, storing NULL? +#ifdef ASSERT + cmpdi(CCR0, Rnew_val, 0); + asm_assert_ne("null oop not allowed (G1)", 0x322); // Checked by caller on PPC64, so following branch is obsolete: + //beq(CCR0, filtered); +#endif + + // Storing region crossing non-NULL, is card already dirty? + assert(sizeof(*bs->byte_map_base) == sizeof(jbyte), "adjust this code"); + const Register Rcard_addr = Rtmp1; + Register Rbase = Rtmp2; + load_const_optimized(Rbase, (address)bs->byte_map_base, /*temp*/ Rtmp3); + + srdi(Rcard_addr, Rstore_addr, CardTableModRefBS::card_shift); + + // Get the address of the card. + lbzx(/*card value*/ Rtmp3, Rbase, Rcard_addr); + + assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code"); + cmpwi(CCR0, Rtmp3 /* card value */, 0); + beq(CCR0, filtered); + + // Storing a region crossing, non-NULL oop, card is clean. + // Dirty card and log. + li(Rtmp3, 0); // dirty + //release(); // G1: oops are allowed to get visible after dirty marking. + stbx(Rtmp3, Rbase, Rcard_addr); + + add(Rcard_addr, Rbase, Rcard_addr); // This is the address which needs to get enqueued. + Rbase = noreg; // end of lifetime + + const Register Rqueue_index = Rtmp2, + Rqueue_buf = Rtmp3; + ld(Rqueue_index, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread); + cmpdi(CCR0, Rqueue_index, 0); + beq(CCR0, runtime); // index == 0 then jump to runtime + ld(Rqueue_buf, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_buf()), R16_thread); + + addi(Rqueue_index, Rqueue_index, -wordSize); // decrement index + std(Rqueue_index, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread); + + stdx(Rcard_addr, Rqueue_buf, Rqueue_index); // store card + b(filtered); + + bind(runtime); + + // Save the live input values. + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr, R16_thread); + + bind(filtered_int); +} +#endif // SERIALGC + +// Values for last_Java_pc, and last_Java_sp must comply to the rules +// in frame_ppc64.hpp. +void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc) { + // Always set last_Java_pc and flags first because once last_Java_sp + // is visible has_last_Java_frame is true and users will look at the + // rest of the fields. (Note: flags should always be zero before we + // get here so doesn't need to be set.) + + // Verify that last_Java_pc was zeroed on return to Java + asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()), R16_thread, + "last_Java_pc not zeroed before leaving Java", 0x200); + + // When returning from calling out from Java mode the frame anchor's + // last_Java_pc will always be set to NULL. It is set here so that + // if we are doing a call to native (not VM) that we capture the + // known pc and don't have to rely on the native call having a + // standard frame linkage where we can find the pc. + if (last_Java_pc != noreg) + std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread); + + // set last_Java_sp last + std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread); +} + +void MacroAssembler::reset_last_Java_frame(void) { + asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), + R16_thread, "SP was not set, still zero", 0x202); + + BLOCK_COMMENT("reset_last_Java_frame {"); + li(R0, 0); + + // _last_Java_sp = 0 + std(R0, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread); + + // _last_Java_pc = 0 + std(R0, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread); + BLOCK_COMMENT("} reset_last_Java_frame"); +} + +void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1) { + assert_different_registers(sp, tmp1); + + // sp points to a TOP_IJAVA_FRAME, retrieve frame's PC via + // TOP_IJAVA_FRAME_ABI. + // FIXME: assert that we really have a TOP_IJAVA_FRAME here! +#ifdef CC_INTERP + ld(tmp1/*pc*/, _top_ijava_frame_abi(frame_manager_lr), sp); +#else + Unimplemented(); +#endif + + set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1); +} + +void MacroAssembler::get_vm_result(Register oop_result) { + // Read: + // R16_thread + // R16_thread->in_bytes(JavaThread::vm_result_offset()) + // + // Updated: + // oop_result + // R16_thread->in_bytes(JavaThread::vm_result_offset()) + + ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread); + li(R0, 0); + std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread); + + verify_oop(oop_result); +} + +void MacroAssembler::get_vm_result_2(Register metadata_result) { + // Read: + // R16_thread + // R16_thread->in_bytes(JavaThread::vm_result_2_offset()) + // + // Updated: + // metadata_result + // R16_thread->in_bytes(JavaThread::vm_result_2_offset()) + + ld(metadata_result, in_bytes(JavaThread::vm_result_2_offset()), R16_thread); + li(R0, 0); + std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread); +} + + +void MacroAssembler::encode_klass_not_null(Register dst, Register src) { + if (src == noreg) src = dst; + if (Universe::narrow_klass_base() != NULL) { + // heapbased + assert(Universe::narrow_klass_shift() != 0, "sanity"); + sub(dst, src, R30); + srdi(dst, dst, Universe::narrow_klass_shift()); + } else if (Universe::narrow_klass_shift() != 0) { + // zerobased + srdi(dst, src, Universe::narrow_klass_shift()); + } else if (src != dst) { + // unscaled + mr(dst, src); + } +} + +void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) { + if (UseCompressedKlassPointers) { + encode_klass_not_null(ck, klass); + stw(ck, oopDesc::klass_offset_in_bytes(), dst_oop); + } else { + std(klass, oopDesc::klass_offset_in_bytes(), dst_oop); + } +} + +void MacroAssembler::decode_klass_not_null(Register dst, Register src) { + if (src == noreg) src = dst; + if (Universe::narrow_klass_base() != NULL) { + // heapbased + assert(Universe::narrow_klass_shift() != 0, "sanity"); + sldi(dst, src, Universe::narrow_klass_shift()); + add(dst, dst, R30); + } else if (Universe::narrow_klass_shift() != 0) { + // zerobased + sldi(dst, src, Universe::narrow_klass_shift()); + } else if (src != dst) { + // unscaled + mr(dst, src); + } +} + +void MacroAssembler::load_klass(Register dst, Register src) { + if (UseCompressedKlassPointers) { + lwz(dst, oopDesc::klass_offset_in_bytes(), src); + // Attention: no null check here! + decode_klass_not_null(dst, dst); + } else { + ld(dst, oopDesc::klass_offset_in_bytes(), src); + } +} + +void MacroAssembler::load_klass_with_trap_null_check(Register dst, Register src) { + if (false NOT_LINUX(|| true) /*!os::zero_page_read_protected()*/) { + if (TrapBasedNullChecks) { + trap_null_check(src); + } + } + load_klass(dst, src); +} + +void MacroAssembler::reinit_heapbase(Register d, Register tmp) { + if (UseCompressedOops || UseCompressedKlassPointers) { + load_const(R30, Universe::narrow_ptrs_base_addr(), tmp); + ld(R30, 0, R30); + } +} + +/////////////////////////////////////////// String intrinsics //////////////////////////////////////////// + +// Search for a single jchar in an jchar[]. +// +// Assumes that result differs from all other registers. +// +// Haystack, needle are the addresses of jchar-arrays. +// NeedleChar is needle[0] if it is known at compile time. +// Haycnt is the length of the haystack. We assume haycnt >=1. +// +// Preserves haystack, haycnt, kills all other registers. +// +// If needle == R0, we search for the constant needleChar. +void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt, + Register needle, jchar needleChar, + Register tmp1, Register tmp2) { + + assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2); + + Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End; + Register needle0 = needle, // Contains needle[0]. + addr = tmp1, + ch1 = tmp2, + ch2 = R0; + +//2 (variable) or 3 (const): + if (needle != R0) lhz(needle0, 0, needle); // Preload needle character, needle has len==1. + dcbtct(haystack, 0x00); // Indicate R/O access to haystack. + + srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR). + mr(addr, haystack); + beq(CCR0, L_FinalCheck); + mtctr(tmp2); // Move to count register. +//8: + bind(L_InnerLoop); // Main work horse (2x unrolled search loop). + lhz(ch1, 0, addr); // Load characters from haystack. + lhz(ch2, 2, addr); + (needle != R0) ? cmpw(CCR0, ch1, needle0) : cmplwi(CCR0, ch1, needleChar); + (needle != R0) ? cmpw(CCR1, ch2, needle0) : cmplwi(CCR1, ch2, needleChar); + beq(CCR0, L_Found1); // Did we find the needle? + beq(CCR1, L_Found2); + addi(addr, addr, 4); + bdnz(L_InnerLoop); +//16: + bind(L_FinalCheck); + andi_(R0, haycnt, 1); + beq(CCR0, L_NotFound); + lhz(ch1, 0, addr); // One position left at which we have to compare. + (needle != R0) ? cmpw(CCR1, ch1, needle0) : cmplwi(CCR1, ch1, needleChar); + beq(CCR1, L_Found3); +//21: + bind(L_NotFound); + li(result, -1); // Not found. + b(L_End); + + bind(L_Found2); + addi(addr, addr, 2); +//24: + bind(L_Found1); + bind(L_Found3); // Return index ... + subf(addr, haystack, addr); // relative to haystack, + srdi(result, addr, 1); // in characters. + bind(L_End); +} + + +// Implementation of IndexOf for jchar arrays. +// +// The length of haystack and needle are not constant, i.e. passed in a register. +// +// Preserves registers haystack, needle. +// Kills registers haycnt, needlecnt. +// Assumes that result differs from all other registers. +// Haystack, needle are the addresses of jchar-arrays. +// Haycnt, needlecnt are the lengths of them, respectively. +// +// Needlecntval must be zero or 15-bit unsigned immediate and > 1. +void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt, + Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval, + Register tmp1, Register tmp2, Register tmp3, Register tmp4) { + + // Ensure 0=2, bail out otherwise. + // ************************************************************************************************** + +//1 (variable) or 3 (const): + dcbtct(needle, 0x00); // Indicate R/O access to str1. + dcbtct(haystack, 0x00); // Indicate R/O access to str2. + + // Compute last haystack addr to use if no match gets found. + if (needlecntval == 0) { // variable needlecnt +//3: + subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt. + addi(addr, haystack, -2); // Accesses use pre-increment. + cmpwi(CCR6, needlecnt, 2); + blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately. + slwi(ch1, ch1, 1); // Scale to number of bytes. + lwz(n_start, 0, needle); // Load first 2 characters of needle. + add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)). + addi(needlecnt, needlecnt, -2); // Rest of needle. + } else { // constant needlecnt + guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately"); + assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate"); +//5: + addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt. + lwz(n_start, 0, needle); // Load first 2 characters of needle. + addi(addr, haystack, -2); // Accesses use pre-increment. + slwi(ch1, ch1, 1); // Scale to number of bytes. + add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)). + li(needlecnt, needlecntval-2); // Rest of needle. + } + + // Main Loop (now we have at least 3 characters). +//11: + Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2, L_Comp3; + bind(L_OuterLoop); // Search for 1st 2 characters. + Register addr_diff = tmp4; + subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check. + addi(addr, addr, 2); // This is the new address we want to use for comparing. + srdi_(ch2, addr_diff, 2); + beq(CCR0, L_FinalCheck); // 2 characters left? + mtctr(ch2); // addr_diff/4 +//16: + bind(L_InnerLoop); // Main work horse (2x unrolled search loop) + lwz(ch1, 0, addr); // Load 2 characters of haystack (ignore alignment). + lwz(ch2, 2, addr); + cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop). + cmpw(CCR1, ch2, n_start); + beq(CCR0, L_Comp1); // Did we find the needle start? + beq(CCR1, L_Comp2); + addi(addr, addr, 4); + bdnz(L_InnerLoop); +//24: + bind(L_FinalCheck); + rldicl_(addr_diff, addr_diff, 64-1, 63); // Remaining characters not covered by InnerLoop: (addr_diff>>1)&1. + beq(CCR0, L_NotFound); + lwz(ch1, 0, addr); // One position left at which we have to compare. + cmpw(CCR1, ch1, n_start); + beq(CCR1, L_Comp3); +//29: + bind(L_NotFound); + li(result, -1); // not found + b(L_End); + + + // ************************************************************************************************** + // Special Case: unfortunately, the variable needle case can be called with needlecnt<2 + // ************************************************************************************************** +//31: + if ((needlecntval>>1) !=1 ) { // Const needlecnt is 2 or 3? Reduce code size. + int nopcnt = 5; + if (needlecntval !=0 ) ++nopcnt; // Balance alignment (other case: see below). + if (needlecntval == 0) { // We have to handle these cases separately. + Label L_OneCharLoop; + bind(L_TooShort); + mtctr(haycnt); + lhz(n_start, 0, needle); // First character of needle + bind(L_OneCharLoop); + lhzu(ch1, 2, addr); + cmpw(CCR1, ch1, n_start); + beq(CCR1, L_Found); // Did we find the one character needle? + bdnz(L_OneCharLoop); + li(result, -1); // Not found. + b(L_End); + } // 8 instructions, so no impact on alignment. + for (int x = 0; x < nopcnt; ++x) nop(); + } + + // ************************************************************************************************** + // Regular Case Part II: compare rest of needle (first 2 characters have been compared already) + // ************************************************************************************************** + + // Compare the rest +//36 if needlecntval==0, else 37: + bind(L_Comp2); + addi(addr, addr, 2); // First comparison has failed, 2nd one hit. + bind(L_Comp1); // Addr points to possible needle start. + bind(L_Comp3); // Could have created a copy and use a different return address but saving code size here. + if (needlecntval != 2) { // Const needlecnt==2? + if (needlecntval != 3) { + if (needlecntval == 0) beq(CCR6, L_Found); // Variable needlecnt==2? + Register ind_reg = tmp4; + li(ind_reg, 2*2); // First 2 characters are already compared, use index 2. + mtctr(needlecnt); // Decremented by 2, still > 0. +//40: + Label L_CompLoop; + bind(L_CompLoop); + lhzx(ch2, needle, ind_reg); + lhzx(ch1, addr, ind_reg); + cmpw(CCR1, ch1, ch2); + bne(CCR1, L_OuterLoop); + addi(ind_reg, ind_reg, 2); + bdnz(L_CompLoop); + } else { // No loop required if there's only one needle character left. + lhz(ch2, 2*2, needle); + lhz(ch1, 2*2, addr); + cmpw(CCR1, ch1, ch2); + bne(CCR1, L_OuterLoop); + } + } + // Return index ... +//46: + bind(L_Found); + subf(addr, haystack, addr); // relative to haystack, ... + srdi(result, addr, 1); // in characters. +//48: + bind(L_End); +} + +// Implementation of Compare for jchar arrays. +// +// Kills the registers str1, str2, cnt1, cnt2. +// Kills cr0, ctr. +// Assumes that result differes from the input registers. +void MacroAssembler::string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg, + Register result_reg, Register tmp_reg) { + assert_different_registers(result_reg, str1_reg, str2_reg, cnt1_reg, cnt2_reg, tmp_reg); + + Label Ldone, Lslow_case, Lslow_loop, Lfast_loop; + Register cnt_diff = R0, + limit_reg = cnt1_reg, + chr1_reg = result_reg, + chr2_reg = cnt2_reg, + addr_diff = str2_reg; + + // Offset 0 should be 32 byte aligned. +//-4: + dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. + dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. +//-2: + // Compute min(cnt1, cnt2) and check if 0 (bail out if we don't need to compare characters). + subf(result_reg, cnt2_reg, cnt1_reg); // difference between cnt1/2 + subf_(addr_diff, str1_reg, str2_reg); // alias? + beq(CCR0, Ldone); // return cnt difference if both ones are identical + srawi(limit_reg, result_reg, 31); // generate signmask (cnt1/2 must be non-negative so cnt_diff can't overflow) + mr(cnt_diff, result_reg); + andr(limit_reg, result_reg, limit_reg); // difference or zero (negative): cnt14 characters for fast loop + andi(limit_reg, tmp_reg, 4-1); // remaining characters + + // Adapt str1_reg str2_reg for the first loop iteration + mtctr(chr2_reg); // (min(cnt1, cnt2)-1)/4 + addi(limit_reg, limit_reg, 4+1); // compare last 5-8 characters in slow_case if mismatch found in fast_loop +//16: + // Compare the rest of the characters + bind(Lfast_loop); + ld(chr1_reg, 0, str1_reg); + ldx(chr2_reg, str1_reg, addr_diff); + cmpd(CCR0, chr2_reg, chr1_reg); + bne(CCR0, Lslow_case); // return chr1_reg + addi(str1_reg, str1_reg, 4*2); + bdnz(Lfast_loop); + addi(limit_reg, limit_reg, -4); // no mismatch found in fast_loop, only 1-4 characters missing +//23: + bind(Lslow_case); + mtctr(limit_reg); +//24: + bind(Lslow_loop); + lhz(chr1_reg, 0, str1_reg); + lhzx(chr2_reg, str1_reg, addr_diff); + subf_(result_reg, chr2_reg, chr1_reg); + bne(CCR0, Ldone); // return chr1_reg + addi(str1_reg, str1_reg, 1*2); + bdnz(Lslow_loop); +//30: + // If strings are equal up to min length, return the length difference. + mr(result_reg, cnt_diff); + nop(); // alignment +//32: + // Otherwise, return the difference between the first mismatched chars. + bind(Ldone); +} + + +// Compare char[] arrays. +// +// str1_reg USE only +// str2_reg USE only +// cnt_reg USE_DEF, due to tmp reg shortage +// result_reg DEF only, might compromise USE only registers +void MacroAssembler::char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg, + Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg, + Register tmp5_reg) { + + // Str1 may be the same register as str2 which can occur e.g. after scalar replacement. + assert_different_registers(result_reg, str1_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg); + assert_different_registers(result_reg, str2_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg); + + // Offset 0 should be 32 byte aligned. + Label Linit_cbc, Lcbc, Lloop, Ldone_true, Ldone_false; + Register index_reg = tmp5_reg; + Register cbc_iter = tmp4_reg; + +//-1: + dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. + dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. +//1: + andi(cbc_iter, cnt_reg, 4-1); // Remaining iterations after 4 java characters per iteration loop. + li(index_reg, 0); // init + li(result_reg, 0); // assume false + srwi_(tmp2_reg, cnt_reg, exact_log2(4)); // Div: 4 java characters per iteration (main loop). + + cmpwi(CCR1, cbc_iter, 0); // CCR1 = (cbc_iter==0) + beq(CCR0, Linit_cbc); // too short + mtctr(tmp2_reg); +//8: + bind(Lloop); + ldx(tmp1_reg, str1_reg, index_reg); + ldx(tmp2_reg, str2_reg, index_reg); + cmpd(CCR0, tmp1_reg, tmp2_reg); + bne(CCR0, Ldone_false); // Unequal char pair found -> done. + addi(index_reg, index_reg, 4*sizeof(jchar)); + bdnz(Lloop); +//14: + bind(Linit_cbc); + beq(CCR1, Ldone_true); + mtctr(cbc_iter); +//16: + bind(Lcbc); + lhzx(tmp1_reg, str1_reg, index_reg); + lhzx(tmp2_reg, str2_reg, index_reg); + cmpw(CCR0, tmp1_reg, tmp2_reg); + bne(CCR0, Ldone_false); // Unequal char pair found -> done. + addi(index_reg, index_reg, 1*sizeof(jchar)); + bdnz(Lcbc); + nop(); + bind(Ldone_true); + li(result_reg, 1); +//24: + bind(Ldone_false); +} + + +void MacroAssembler::char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg, + Register tmp1_reg, Register tmp2_reg) { + // Str1 may be the same register as str2 which can occur e.g. after scalar replacement. + assert_different_registers(result_reg, str1_reg, tmp1_reg, tmp2_reg); + assert_different_registers(result_reg, str2_reg, tmp1_reg, tmp2_reg); + assert(sizeof(jchar) == 2, "must be"); + assert(cntval >= 0 && ((cntval & 0x7fff) == cntval), "wrong immediate"); + + Label Ldone_false; + + if (cntval < 16) { // short case + if (cntval != 0) li(result_reg, 0); // assume false + + const int num_bytes = cntval*sizeof(jchar); + int index = 0; + for (int next_index; (next_index = index + 8) <= num_bytes; index = next_index) { + ld(tmp1_reg, index, str1_reg); + ld(tmp2_reg, index, str2_reg); + cmpd(CCR0, tmp1_reg, tmp2_reg); + bne(CCR0, Ldone_false); + } + if (cntval & 2) { + lwz(tmp1_reg, index, str1_reg); + lwz(tmp2_reg, index, str2_reg); + cmpw(CCR0, tmp1_reg, tmp2_reg); + bne(CCR0, Ldone_false); + index += 4; + } + if (cntval & 1) { + lhz(tmp1_reg, index, str1_reg); + lhz(tmp2_reg, index, str2_reg); + cmpw(CCR0, tmp1_reg, tmp2_reg); + bne(CCR0, Ldone_false); + } + // fallthrough: true + } else { + Label Lloop; + Register index_reg = tmp1_reg; + const int loopcnt = cntval/4; + assert(loopcnt > 0, "must be"); + // Offset 0 should be 32 byte aligned. + //2: + dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. + dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. + li(tmp2_reg, loopcnt); + li(index_reg, 0); // init + li(result_reg, 0); // assume false + mtctr(tmp2_reg); + //8: + bind(Lloop); + ldx(R0, str1_reg, index_reg); + ldx(tmp2_reg, str2_reg, index_reg); + cmpd(CCR0, R0, tmp2_reg); + bne(CCR0, Ldone_false); // Unequal char pair found -> done. + addi(index_reg, index_reg, 4*sizeof(jchar)); + bdnz(Lloop); + //14: + if (cntval & 2) { + lwzx(R0, str1_reg, index_reg); + lwzx(tmp2_reg, str2_reg, index_reg); + cmpw(CCR0, R0, tmp2_reg); + bne(CCR0, Ldone_false); + if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar)); + } + if (cntval & 1) { + lhzx(R0, str1_reg, index_reg); + lhzx(tmp2_reg, str2_reg, index_reg); + cmpw(CCR0, R0, tmp2_reg); + bne(CCR0, Ldone_false); + } + // fallthru: true + } + li(result_reg, 1); + bind(Ldone_false); +} + + +void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) { +#ifdef ASSERT + Label ok; + if (check_equal) { + beq(CCR0, ok); + } else { + bne(CCR0, ok); + } + stop(msg, id); + bind(ok); +#endif +} + +void MacroAssembler::asm_assert_mems_zero(bool check_equal, int size, int mem_offset, + Register mem_base, const char* msg, int id) { +#ifdef ASSERT + switch (size) { + case 4: + lwz(R0, mem_offset, mem_base); + cmpwi(CCR0, R0, 0); + break; + case 8: + ld(R0, mem_offset, mem_base); + cmpdi(CCR0, R0, 0); + break; + default: + ShouldNotReachHere(); + } + asm_assert(check_equal, msg, id); +#endif // ASSERT +} + +void MacroAssembler::verify_thread() { + if (VerifyThread) { + unimplemented("'VerifyThread' currently not implemented on PPC"); + } +} + +// READ: oop. KILL: R0. Volatile floats perhaps. +void MacroAssembler::verify_oop(Register oop, const char* msg) { + if (!VerifyOops) { + return; + } + // will be preserved. + Register tmp = R11; + assert(oop != tmp, "precondition"); + unsigned int nbytes_save = 10*8; // 10 volatile gprs + address/* FunctionDescriptor** */fd = + StubRoutines::verify_oop_subroutine_entry_address(); + // save tmp + mr(R0, tmp); + // kill tmp + save_LR_CR(tmp); + push_frame_abi112(nbytes_save, tmp); + // restore tmp + mr(tmp, R0); + save_volatile_gprs(R1_SP, 112); // except R0 + // load FunctionDescriptor** + load_const(tmp, fd); + // load FunctionDescriptor* + ld(tmp, 0, tmp); + mr(R4_ARG2, oop); + load_const(R3_ARG1, (address)msg); + // call destination for its side effect + call_c(tmp); + restore_volatile_gprs(R1_SP, 112); // except R0 + pop_frame(); + // save tmp + mr(R0, tmp); + // kill tmp + restore_LR_CR(tmp); + // restore tmp + mr(tmp, R0); +} + +const char* stop_types[] = { + "stop", + "untested", + "unimplemented", + "shouldnotreachhere" +}; + +static void stop_on_request(int tp, const char* msg) { + tty->print("PPC assembly code requires stop: (%s) %s\n", (void *)stop_types[tp%/*stop_end*/4], msg); + guarantee(false, err_msg("PPC assembly code requires stop: %s", msg)); +} + +// Call a C-function that prints output. +void MacroAssembler::stop(int type, const char* msg, int id) { +#ifndef PRODUCT + block_comment(err_msg("stop: %s %s {", stop_types[type%stop_end], msg)); +#else + block_comment("stop {"); +#endif + + // setup arguments + load_const_optimized(R3_ARG1, type); + load_const_optimized(R4_ARG2, (void *)msg, /*tmp=*/R0); + call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), R3_ARG1, R4_ARG2); + illtrap(); + emit_int32(id); + block_comment("} stop;"); +} + +#ifndef PRODUCT +// Write pattern 0x0101010101010101 in memory region [low-before, high+after]. +// Val, addr are temp registers. +// If low == addr, addr is killed. +// High is preserved. +void MacroAssembler::zap_from_to(Register low, int before, Register high, int after, Register val, Register addr) { + if (!ZapMemory) return; + + assert_different_registers(low, val); + + BLOCK_COMMENT("zap memory region {"); + load_const_optimized(val, 0x0101010101010101); + int size = before + after; + if (low == high && size < 5 && size > 0) { + int offset = -before*BytesPerWord; + for (int i = 0; i < size; ++i) { + std(val, offset, low); + offset += (1*BytesPerWord); + } + } else { + addi(addr, low, -before*BytesPerWord); + assert_different_registers(high, val); + if (after) addi(high, high, after * BytesPerWord); + Label loop; + bind(loop); + std(val, 0, addr); + addi(addr, addr, 8); + cmpd(CCR6, addr, high); + ble(CCR6, loop); + if (after) addi(high, high, -after * BytesPerWord); // Correct back to old value. + } + BLOCK_COMMENT("} zap memory region"); +} + +#endif // !PRODUCT diff --git a/src/cpu/ppc/vm/macroAssembler_ppc.hpp b/src/cpu/ppc/vm/macroAssembler_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/macroAssembler_ppc.hpp @@ -0,0 +1,658 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_MACROASSEMBLER_PPC_HPP +#define CPU_PPC_VM_MACROASSEMBLER_PPC_HPP + +#include "asm/assembler.hpp" + +// MacroAssembler extends Assembler by a few frequently used macros. + +class ciTypeArray; + +class MacroAssembler: public Assembler { + public: + MacroAssembler(CodeBuffer* code) : Assembler(code) {} + + // + // Optimized instruction emitters + // + + inline static int largeoffset_si16_si16_hi(int si31) { return (si31 + (1<<15)) >> 16; } + inline static int largeoffset_si16_si16_lo(int si31) { return si31 - (((si31 + (1<<15)) >> 16) << 16); } + + // load d = *[a+si31] + // Emits several instructions if the offset is not encodable in one instruction. + void ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop); + void ld_largeoffset (Register d, int si31, Register a, int emit_filler_nop); + inline static bool is_ld_largeoffset(address a); + inline static int get_ld_largeoffset_offset(address a); + + inline void round_to(Register r, int modulus); + + // Load/store with type given by parameter. + void load_sized_value( Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes, bool is_signed); + void store_sized_value(Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes); + + // Move register if destination register and target register are different + inline void mr_if_needed(Register rd, Register rs); + + // nop padding + void align(int modulus); + + // + // Constants, loading constants, TOC support + // + + // Address of the global TOC. + inline static address global_toc(); + // Offset of given address to the global TOC. + inline static int offset_to_global_toc(const address addr); + + // Address of TOC of the current method. + inline address method_toc(); + // Offset of given address to TOC of the current method. + inline int offset_to_method_toc(const address addr); + + // Global TOC. + void calculate_address_from_global_toc(Register dst, address addr, + bool hi16 = true, bool lo16 = true, + bool add_relocation = true, bool emit_dummy_addr = false); + inline void calculate_address_from_global_toc_hi16only(Register dst, address addr) { + calculate_address_from_global_toc(dst, addr, true, false); + }; + inline void calculate_address_from_global_toc_lo16only(Register dst, address addr) { + calculate_address_from_global_toc(dst, addr, false, true); + }; + + inline static bool is_calculate_address_from_global_toc_at(address a, address bound); + static int patch_calculate_address_from_global_toc_at(address a, address addr, address bound); + static address get_address_of_calculate_address_from_global_toc_at(address a, address addr); + +#ifdef _LP64 + // Patch narrow oop constant. + inline static bool is_set_narrow_oop(address a, address bound); + static int patch_set_narrow_oop(address a, address bound, narrowOop data); + static narrowOop get_narrow_oop(address a, address bound); +#endif + + inline static bool is_load_const_at(address a); + + // Emits an oop const to the constant pool, loads the constant, and + // sets a relocation info with address current_pc. + void load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc); + void load_toc_from_toc(Register dst, AddressLiteral& a, Register toc) { + assert(dst == R2_TOC, "base register must be TOC"); + load_const_from_method_toc(dst, a, toc); + } + + static bool is_load_const_from_method_toc_at(address a); + static int get_offset_of_load_const_from_method_toc_at(address a); + + // Get the 64 bit constant from a `load_const' sequence. + static long get_const(address load_const); + + // Patch the 64 bit constant of a `load_const' sequence. This is a + // low level procedure. It neither flushes the instruction cache nor + // is it atomic. + static void patch_const(address load_const, long x); + + // Metadata in code that we have to keep track of. + AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index + AddressLiteral constant_metadata_address(Metadata* obj); // find_index + // Oops used directly in compiled code are stored in the constant pool, + // and loaded from there. + // Allocate new entry for oop in constant pool. Generate relocation. + AddressLiteral allocate_oop_address(jobject obj); + // Find oop obj in constant pool. Return relocation with it's index. + AddressLiteral constant_oop_address(jobject obj); + + // Find oop in constant pool and emit instructions to load it. + // Uses constant_oop_address. + inline void set_oop_constant(jobject obj, Register d); + // Same as load_address. + inline void set_oop (AddressLiteral obj_addr, Register d); + + // Read runtime constant: Issue load if constant not yet established, + // else use real constant. + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset); + + // + // branch, jump + // + + inline void pd_patch_instruction(address branch, address target); + NOT_PRODUCT(static void pd_print_patched_instruction(address branch);) + + // Conditional far branch for destinations encodable in 24+2 bits. + // Same interface as bc, e.g. no inverse boint-field. + enum { + bc_far_optimize_not = 0, + bc_far_optimize_on_relocate = 1 + }; + // optimize: flag for telling the conditional far branch to optimize + // itself when relocated. + void bc_far(int boint, int biint, Label& dest, int optimize); + // Relocation of conditional far branches. + static bool is_bc_far_at(address instruction_addr); + static address get_dest_of_bc_far_at(address instruction_addr); + static void set_dest_of_bc_far_at(address instruction_addr, address dest); + private: + static bool inline is_bc_far_variant1_at(address instruction_addr); + static bool inline is_bc_far_variant2_at(address instruction_addr); + static bool inline is_bc_far_variant3_at(address instruction_addr); + public: + + // Convenience bc_far versions. + inline void blt_far(ConditionRegister crx, Label& L, int optimize); + inline void bgt_far(ConditionRegister crx, Label& L, int optimize); + inline void beq_far(ConditionRegister crx, Label& L, int optimize); + inline void bso_far(ConditionRegister crx, Label& L, int optimize); + inline void bge_far(ConditionRegister crx, Label& L, int optimize); + inline void ble_far(ConditionRegister crx, Label& L, int optimize); + inline void bne_far(ConditionRegister crx, Label& L, int optimize); + inline void bns_far(ConditionRegister crx, Label& L, int optimize); + + // Emit, identify and patch a NOT mt-safe patchable 64 bit absolute call/jump. + private: + enum { + bxx64_patchable_instruction_count = (2/*load_codecache_const*/ + 3/*5load_const*/ + 1/*mtctr*/ + 1/*bctrl*/), + bxx64_patchable_size = bxx64_patchable_instruction_count * BytesPerInstWord, + bxx64_patchable_ret_addr_offset = bxx64_patchable_size + }; + void bxx64_patchable(address target, relocInfo::relocType rt, bool link); + static bool is_bxx64_patchable_at( address instruction_addr, bool link); + // Does the instruction use a pc-relative encoding of the destination? + static bool is_bxx64_patchable_pcrelative_at( address instruction_addr, bool link); + static bool is_bxx64_patchable_variant1_at( address instruction_addr, bool link); + // Load destination relative to global toc. + static bool is_bxx64_patchable_variant1b_at( address instruction_addr, bool link); + static bool is_bxx64_patchable_variant2_at( address instruction_addr, bool link); + static void set_dest_of_bxx64_patchable_at( address instruction_addr, address target, bool link); + static address get_dest_of_bxx64_patchable_at(address instruction_addr, bool link); + + public: + // call + enum { + bl64_patchable_instruction_count = bxx64_patchable_instruction_count, + bl64_patchable_size = bxx64_patchable_size, + bl64_patchable_ret_addr_offset = bxx64_patchable_ret_addr_offset + }; + inline void bl64_patchable(address target, relocInfo::relocType rt) { + bxx64_patchable(target, rt, /*link=*/true); + } + inline static bool is_bl64_patchable_at(address instruction_addr) { + return is_bxx64_patchable_at(instruction_addr, /*link=*/true); + } + inline static bool is_bl64_patchable_pcrelative_at(address instruction_addr) { + return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/true); + } + inline static void set_dest_of_bl64_patchable_at(address instruction_addr, address target) { + set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/true); + } + inline static address get_dest_of_bl64_patchable_at(address instruction_addr) { + return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/true); + } + // jump + enum { + b64_patchable_instruction_count = bxx64_patchable_instruction_count, + b64_patchable_size = bxx64_patchable_size, + }; + inline void b64_patchable(address target, relocInfo::relocType rt) { + bxx64_patchable(target, rt, /*link=*/false); + } + inline static bool is_b64_patchable_at(address instruction_addr) { + return is_bxx64_patchable_at(instruction_addr, /*link=*/false); + } + inline static bool is_b64_patchable_pcrelative_at(address instruction_addr) { + return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/false); + } + inline static void set_dest_of_b64_patchable_at(address instruction_addr, address target) { + set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/false); + } + inline static address get_dest_of_b64_patchable_at(address instruction_addr) { + return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/false); + } + + // + // Support for frame handling + // + + // some ABI-related functions + void save_nonvolatile_gprs( Register dst_base, int offset); + void restore_nonvolatile_gprs(Register src_base, int offset); + void save_volatile_gprs( Register dst_base, int offset); + void restore_volatile_gprs(Register src_base, int offset); + void save_LR_CR( Register tmp); // tmp contains LR on return. + void restore_LR_CR(Register tmp); + + // Get current PC using bl-next-instruction trick. + address get_PC_trash_LR(Register result); + + // Resize current frame either relatively wrt to current SP or absolute. + void resize_frame(Register offset, Register tmp); + void resize_frame(int offset, Register tmp); + void resize_frame_absolute(Register addr, Register tmp1, Register tmp2); + + // Push a frame of size bytes. + void push_frame(Register bytes, Register tmp); + + // Push a frame of size `bytes'. No abi space provided. + void push_frame(unsigned int bytes, Register tmp); + + // Push a frame of size `bytes' plus abi112 on top. + void push_frame_abi112(unsigned int bytes, Register tmp); + + // Setup up a new C frame with a spill area for non-volatile GPRs and additional + // space for local variables + void push_frame_abi112_nonvolatiles(unsigned int bytes, Register tmp); + + // pop current C frame + void pop_frame(); + + // + // Calls + // + + private: + address _last_calls_return_pc; + + // Generic version of a call to C function via a function descriptor + // with variable support for C calling conventions (TOC, ENV, etc.). + // updates and returns _last_calls_return_pc. + address branch_to(Register function_descriptor, bool and_link, bool save_toc_before_call, + bool restore_toc_after_call, bool load_toc_of_callee, bool load_env_of_callee); + + public: + + // Get the pc where the last call will return to. returns _last_calls_return_pc. + inline address last_calls_return_pc(); + + // Call a C function via a function descriptor and use full C + // calling conventions. Updates and returns _last_calls_return_pc. + address call_c(Register function_descriptor); + address call_c(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt); + address call_c_using_toc(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt, + Register toc); + + protected: + + // It is imperative that all calls into the VM are handled via the + // call_VM macros. They make sure that the stack linkage is setup + // correctly. call_VM's correspond to ENTRY/ENTRY_X entry points + // while call_VM_leaf's correspond to LEAF entry points. + // + // This is the base routine called by the different versions of + // call_VM. The interpreter may customize this version by overriding + // it for its purposes (e.g., to save/restore additional registers + // when doing a VM call). + // + // If no last_java_sp is specified (noreg) then SP will be used instead. + virtual void call_VM_base( + // where an oop-result ends up if any; use noreg otherwise + Register oop_result, + // to set up last_Java_frame in stubs; use noreg otherwise + Register last_java_sp, + // the entry point + address entry_point, + // flag which indicates if exception should be checked + bool check_exception=true + ); + + // Support for VM calls. This is the base routine called by the + // different versions of call_VM_leaf. The interpreter may customize + // this version by overriding it for its purposes (e.g., to + // save/restore additional registers when doing a VM call). + void call_VM_leaf_base(address entry_point); + + public: + // Call into the VM. + // Passes the thread pointer (in R3_ARG1) as a prepended argument. + // Makes sure oop return values are visible to the GC. + void call_VM(Register oop_result, address entry_point, bool check_exceptions = true); + void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true); + void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); + void call_VM_leaf(address entry_point); + void call_VM_leaf(address entry_point, Register arg_1); + void call_VM_leaf(address entry_point, Register arg_1, Register arg_2); + void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); + + // Call a stub function via a function descriptor, but don't save + // TOC before call, don't setup TOC and ENV for call, and don't + // restore TOC after call. Updates and returns _last_calls_return_pc. + inline address call_stub(Register function_entry); + inline void call_stub_and_return_to(Register function_entry, Register return_pc); + + // + // Java utilities + // + + // Read from the polling page, its address is already in a register. + inline void load_from_polling_page(Register polling_page_address, int offset = 0); + // Check whether instruction is a read access to the polling page + // which was emitted by load_from_polling_page(..). + static bool is_load_from_polling_page(int instruction, void* ucontext/*may be NULL*/, + address* polling_address_ptr = NULL); + + // Check whether instruction is a write access to the memory + // serialization page realized by one of the instructions stw, stwu, + // stwx, or stwux. + static bool is_memory_serialization(int instruction, JavaThread* thread, void* ucontext); + + // Support for NULL-checks + // + // Generates code that causes a NULL OS exception if the content of reg is NULL. + // If the accessed location is M[reg + offset] and the offset is known, provide the + // offset. No explicit code generation is needed if the offset is within a certain + // range (0 <= offset <= page_size). + + // Stack overflow checking + void bang_stack_with_offset(int offset); + + // If instruction is a stack bang of the form ld, stdu, or + // stdux, return the banged address. Otherwise, return 0. + static address get_stack_bang_address(int instruction, void* ucontext); + + // Atomics + // CmpxchgX sets condition register to cmpX(current, compare). + // (flag == ne) => (dest_current_value != compare_value), (!swapped) + // (flag == eq) => (dest_current_value == compare_value), ( swapped) + static inline bool cmpxchgx_hint_acquire_lock() { return true; } + // The stxcx will probably not be succeeded by a releasing store. + static inline bool cmpxchgx_hint_release_lock() { return false; } + static inline bool cmpxchgx_hint_atomic_update() { return false; } + + // Cmpxchg semantics + enum { + MemBarNone = 0, + MemBarRel = 1, + MemBarAcq = 2, + MemBarFenceAfter = 4 // use powers of 2 + }; + void cmpxchgw(ConditionRegister flag, + Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, + int semantics, bool cmpxchgx_hint = false, + Register int_flag_success = noreg, bool contention_hint = false); + void cmpxchgd(ConditionRegister flag, + Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, + int semantics, bool cmpxchgx_hint = false, + Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false); + + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register temp_reg, Register temp2_reg, + Label& no_such_interface); + + // virtual method calling + void lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result); + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except temp_reg and temp2_reg. + // If super_check_offset is not -1, temp2_reg is not used and can be noreg. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp1_reg, + Register temp2_reg, + Label& L_success, + Label& L_failure); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The temp_reg can be noreg, if no temps are available. + // It can also be sub_klass or super_klass, meaning it's OK to kill that one. + // Updates the sub's secondary super cache as necessary. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp1_reg, + Register temp2_reg, + Label* L_success = NULL, + Register result_reg = noreg); + + // Simplified, combined version, good for typical uses. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp1_reg, + Register temp2_reg, + Label& L_success); + + // Method handle support (JSR 292). + void check_method_handle_type(Register mtype_reg, Register mh_reg, Register temp_reg, Label& wrong_method_type); + + RegisterOrConstant argument_offset(RegisterOrConstant arg_slot, Register temp_reg, int extra_slot_offset = 0); + + // Biased locking support + // Upon entry,obj_reg must contain the target object, and mark_reg + // must contain the target object's header. + // Destroys mark_reg if an attempt is made to bias an anonymously + // biased lock. In this case a failure will go either to the slow + // case or fall through with the notEqual condition code set with + // the expectation that the slow case in the runtime will be called. + // In the fall-through case where the CAS-based lock is done, + // mark_reg is not destroyed. + void biased_locking_enter(ConditionRegister cr_reg, Register obj_reg, Register mark_reg, Register temp_reg, + Register temp2_reg, Label& done, Label* slow_case = NULL); + // Upon entry, the base register of mark_addr must contain the oop. + // Destroys temp_reg. + // If allow_delay_slot_filling is set to true, the next instruction + // emitted after this one will go in an annulled delay slot if the + // biased locking exit case failed. + void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done); + + void compiler_fast_lock_object( ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3); + void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3); + + // Support for serializing memory accesses between threads + void serialize_memory(Register thread, Register tmp1, Register tmp2); + + // GC barrier support. + void card_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp); + void card_table_write(jbyte* byte_map_base, Register Rtmp, Register Robj); + +#ifndef SERIALGC + // General G1 pre-barrier generator. + void g1_write_barrier_pre(Register Robj, RegisterOrConstant offset, Register Rpre_val, + Register Rtmp1, Register Rtmp2, bool needs_frame = false); + // General G1 post-barrier generator + void g1_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp1, + Register Rtmp2, Register Rtmp3, Label *filtered_ext = NULL); +#endif // SERIALGC + + // Support for managing the JavaThread pointer (i.e.; the reference to + // thread-local information). + + // Support for last Java frame (but use call_VM instead where possible): + // access R16_thread->last_Java_sp. + void set_last_Java_frame(Register last_java_sp, Register last_Java_pc); + void reset_last_Java_frame(void); + void set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1); + + // Read vm result from thread: oop_result = R16_thread->result; + void get_vm_result (Register oop_result); + void get_vm_result_2(Register metadata_result); + + static bool needs_explicit_null_check(intptr_t offset); + + // Trap-instruction-based checks. + // Range checks can be distinguished from zero checks as they check 32 bit, + // zero checks all 64 bits (tw, td). + inline void trap_null_check(Register a, trap_to_bits cmp = traptoEqual); + static bool is_trap_null_check(int x) { + return is_tdi(x, traptoEqual, -1/*any reg*/, 0) || + is_tdi(x, traptoGreaterThanUnsigned, -1/*any reg*/, 0); + } + + inline void trap_zombie_not_entrant(); + static bool is_trap_zombie_not_entrant(int x) { return is_tdi(x, traptoUnconditional, 0/*reg 0*/, 1); } + + inline void trap_should_not_reach_here(); + static bool is_trap_should_not_reach_here(int x) { return is_tdi(x, traptoUnconditional, 0/*reg 0*/, 2); } + + inline void trap_ic_miss_check(Register a, Register b); + static bool is_trap_ic_miss_check(int x) { + return is_td(x, traptoGreaterThanUnsigned | traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/); + } + + // Implicit or explicit null check, jumps to static address exception_entry. + inline void null_check_throw(Register a, int offset, Register temp_reg, address exception_entry); + + // Check accessed object for null. Use SIGTRAP-based null checks on AIX. + inline void ld_with_trap_null_check(Register d, int si16, Register s1); + // Variant for heap OOPs including decompression of compressed OOPs. + inline void load_heap_oop_with_trap_null_check(Register d, RegisterOrConstant offs, Register s1); + + // Load heap oop and decompress. Loaded oop may not be null. + inline void load_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1 = noreg); + + // Null allowed. + inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg); + + // Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong. + inline void encode_heap_oop_not_null(Register d); + inline void decode_heap_oop_not_null(Register d); + + // Null allowed. + inline void decode_heap_oop(Register d); + + // Load/Store klass oop from klass field. Compress. + void load_klass(Register dst, Register src); + void load_klass_with_trap_null_check(Register dst, Register src); + void store_klass(Register dst_oop, Register klass, Register tmp = R0); + void decode_klass_not_null(Register dst, Register src = noreg); + void encode_klass_not_null(Register dst, Register src = noreg); + + // Load common heap base into register. + void reinit_heapbase(Register d, Register tmp = noreg); + + // SIGTRAP-based range checks for arrays. + inline void trap_range_check_l(Register a, Register b); + inline void trap_range_check_l(Register a, int si16); + static bool is_trap_range_check_l(int x) { + return (is_tw (x, traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/) || + is_twi(x, traptoLessThanUnsigned, -1/*any reg*/) ); + } + inline void trap_range_check_le(Register a, int si16); + static bool is_trap_range_check_le(int x) { + return is_twi(x, traptoEqual | traptoLessThanUnsigned, -1/*any reg*/); + } + inline void trap_range_check_g(Register a, int si16); + static bool is_trap_range_check_g(int x) { + return is_twi(x, traptoGreaterThanUnsigned, -1/*any reg*/); + } + inline void trap_range_check_ge(Register a, Register b); + inline void trap_range_check_ge(Register a, int si16); + static bool is_trap_range_check_ge(int x) { + return (is_tw (x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/, -1/*any reg*/) || + is_twi(x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/) ); + } + static bool is_trap_range_check(int x) { + return is_trap_range_check_l(x) || is_trap_range_check_le(x) || + is_trap_range_check_g(x) || is_trap_range_check_ge(x); + } + + // Needle of length 1. + void string_indexof_1(Register result, Register haystack, Register haycnt, + Register needle, jchar needleChar, + Register tmp1, Register tmp2); + // General indexof, eventually with constant needle length. + void string_indexof(Register result, Register haystack, Register haycnt, + Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval, + Register tmp1, Register tmp2, Register tmp3, Register tmp4); + void string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg, + Register result_reg, Register tmp_reg); + void char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg, + Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg, + Register tmp5_reg); + void char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg, + Register tmp1_reg, Register tmp2_reg); + + // + // Debugging + // + + // assert on cr0 + void asm_assert(bool check_equal, const char* msg, int id); + void asm_assert_eq(const char* msg, int id) { asm_assert(true, msg, id); } + void asm_assert_ne(const char* msg, int id) { asm_assert(false, msg, id); } + + private: + void asm_assert_mems_zero(bool check_equal, int size, int mem_offset, Register mem_base, + const char* msg, int id); + + public: + + void asm_assert_mem8_is_zero(int mem_offset, Register mem_base, const char* msg, int id) { + asm_assert_mems_zero(true, 8, mem_offset, mem_base, msg, id); + } + void asm_assert_mem8_isnot_zero(int mem_offset, Register mem_base, const char* msg, int id) { + asm_assert_mems_zero(false, 8, mem_offset, mem_base, msg, id); + } + + // Verify R16_thread contents. + void verify_thread(); + + // Emit code to verify that reg contains a valid oop if +VerifyOops is set. + void verify_oop(Register reg, const char* s = "broken oop"); + + // TODO: verify method and klass metadata (compare against vptr?) + void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} + void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} + +#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) +#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + private: + + enum { + stop_stop = 0, + stop_untested = 1, + stop_unimplemented = 2, + stop_shouldnotreachhere = 3, + stop_end = 4 + }; + void stop(int type, const char* msg, int id); + + public: + // Prints msg, dumps registers and stops execution. + void stop (const char* msg = "", int id = 0) { stop(stop_stop, msg, id); } + void untested (const char* msg = "", int id = 0) { stop(stop_untested, msg, id); } + void unimplemented(const char* msg = "", int id = 0) { stop(stop_unimplemented, msg, id); } + void should_not_reach_here() { stop(stop_shouldnotreachhere, "", -1); } + + void zap_from_to(Register low, int before, Register high, int after, Register val, Register addr) PRODUCT_RETURN; +}; + +#endif // CPU_PPC_VM_MACROASSEMBLER_PPC_HPP diff --git a/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp b/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp @@ -0,0 +1,382 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_MACROASSEMBLER_PPC_INLINE_HPP +#define CPU_PPC_VM_MACROASSEMBLER_PPC_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +inline bool MacroAssembler::is_ld_largeoffset(address a) { + const int inst1 = *(int *)a; + const int inst2 = *(int *)(a+4); + return (is_ld(inst1)) || + (is_addis(inst1) && is_ld(inst2) && inv_ra_field(inst2) == inv_rt_field(inst1)); +} + +inline int MacroAssembler::get_ld_largeoffset_offset(address a) { + assert(MacroAssembler::is_ld_largeoffset(a), "must be ld with large offset"); + + const int inst1 = *(int *)a; + if (is_ld(inst1)) { + return inv_d1_field(inst1); + } else { + const int inst2 = *(int *)(a+4); + return (inv_d1_field(inst1) << 16) + inv_d1_field(inst2); + } +} + +inline void MacroAssembler::round_to(Register r, int modulus) { + assert(is_power_of_2_long((jlong)modulus), "must be power of 2"); + addi(r, r, modulus-1); + clrrdi(r, r, log2_long((jlong)modulus)); +} + +// Move register if destination register and target register are different. +inline void MacroAssembler::mr_if_needed(Register rd, Register rs) { + if(rs !=rd) mr(rd, rs); +} + +// Address of the global TOC. +inline address MacroAssembler::global_toc() { + return CodeCache::low_bound(); +} + +// Offset of given address to the global TOC. +inline int MacroAssembler::offset_to_global_toc(const address addr) { + intptr_t offset = (intptr_t)addr - (intptr_t)MacroAssembler::global_toc(); + assert(Assembler::is_simm((long)offset, 31) && offset >= 0, "must be in range"); + return (int)offset; +} + +// Address of current method's TOC. +inline address MacroAssembler::method_toc() { + return code()->consts()->start(); +} + +// Offset of given address to current method's TOC. +inline int MacroAssembler::offset_to_method_toc(address addr) { + intptr_t offset = (intptr_t)addr - (intptr_t)method_toc(); + assert(is_simm((long)offset, 31) && offset >= 0, "must be in range"); + return (int)offset; +} + +inline bool MacroAssembler::is_calculate_address_from_global_toc_at(address a, address bound) { + const address inst2_addr = a; + const int inst2 = *(int *) a; + + // The relocation points to the second instruction, the addi. + if (!is_addi(inst2)) return false; + + // The addi reads and writes the same register dst. + const int dst = inv_rt_field(inst2); + if (inv_ra_field(inst2) != dst) return false; + + // Now, find the preceding addis which writes to dst. + int inst1 = 0; + address inst1_addr = inst2_addr - BytesPerInstWord; + while (inst1_addr >= bound) { + inst1 = *(int *) inst1_addr; + if (is_addis(inst1) && inv_rt_field(inst1) == dst) { + // stop, found the addis which writes dst + break; + } + inst1_addr -= BytesPerInstWord; + } + + if (!(inst1 == 0 || inv_ra_field(inst1) == 29 /* R29 */)) return false; + return is_addis(inst1); +} + +#ifdef _LP64 +// Detect narrow oop constants. +inline bool MacroAssembler::is_set_narrow_oop(address a, address bound) { + const address inst2_addr = a; + const int inst2 = *(int *)a; + + // The relocation points to the second instruction, the addi. + if (!is_addi(inst2)) return false; + + // The addi reads and writes the same register dst. + const int dst = inv_rt_field(inst2); + if (inv_ra_field(inst2) != dst) return false; + + // Now, find the preceding addis which writes to dst. + int inst1 = 0; + address inst1_addr = inst2_addr - BytesPerInstWord; + while (inst1_addr >= bound) { + inst1 = *(int *) inst1_addr; + if (is_lis(inst1) && inv_rs_field(inst1) == dst) return true; + inst1_addr -= BytesPerInstWord; + } + return false; +} +#endif + + +inline bool MacroAssembler::is_load_const_at(address a) { + const int* p_inst = (int *) a; + bool b = is_lis(*p_inst++); + if (is_ori(*p_inst)) { + p_inst++; + b = b && is_rldicr(*p_inst++); // TODO: could be made more precise: `sldi'! + b = b && is_oris(*p_inst++); + b = b && is_ori(*p_inst); + } else if (is_lis(*p_inst)) { + p_inst++; + b = b && is_ori(*p_inst++); + b = b && is_ori(*p_inst); + // TODO: could enhance reliability by adding is_insrdi + } else return false; + return b; +} + +inline void MacroAssembler::set_oop_constant(jobject obj, Register d) { + set_oop(constant_oop_address(obj), d); +} + +inline void MacroAssembler::set_oop(AddressLiteral obj_addr, Register d) { + assert(obj_addr.rspec().type() == relocInfo::oop_type, "must be an oop reloc"); + load_const(d, obj_addr); +} + +inline void MacroAssembler::pd_patch_instruction(address branch, address target) { + jint& stub_inst = *(jint*) branch; + stub_inst = patched_branch(target - branch, stub_inst, 0); +} + +// Relocation of conditional far branches. +inline bool MacroAssembler::is_bc_far_variant1_at(address instruction_addr) { + // Variant 1, the 1st instruction contains the destination address: + // + // bcxx DEST + // endgroup + // + const int instruction_1 = *(int*)(instruction_addr); + const int instruction_2 = *(int*)(instruction_addr + 4); + return is_bcxx(instruction_1) && + (inv_bd_field(instruction_1, (intptr_t)instruction_addr) != (intptr_t)(instruction_addr + 2*4)) && + is_endgroup(instruction_2); +} + +// Relocation of conditional far branches. +inline bool MacroAssembler::is_bc_far_variant2_at(address instruction_addr) { + // Variant 2, the 2nd instruction contains the destination address: + // + // b!cxx SKIP + // bxx DEST + // SKIP: + // + const int instruction_1 = *(int*)(instruction_addr); + const int instruction_2 = *(int*)(instruction_addr + 4); + return is_bcxx(instruction_1) && + (inv_bd_field(instruction_1, (intptr_t)instruction_addr) == (intptr_t)(instruction_addr + 2*4)) && + is_bxx(instruction_2); +} + +// Relocation for conditional branches +inline bool MacroAssembler::is_bc_far_variant3_at(address instruction_addr) { + // Variant 3, far cond branch to the next instruction, already patched to nops: + // + // nop + // endgroup + // SKIP/DEST: + // + const int instruction_1 = *(int*)(instruction_addr); + const int instruction_2 = *(int*)(instruction_addr + 4); + return is_nop(instruction_1) && + is_endgroup(instruction_2); +} + + +// Convenience bc_far versions +inline void MacroAssembler::blt_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs1, bi0(crx, less), L, optimize); } +inline void MacroAssembler::bgt_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs1, bi0(crx, greater), L, optimize); } +inline void MacroAssembler::beq_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs1, bi0(crx, equal), L, optimize); } +inline void MacroAssembler::bso_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs1, bi0(crx, summary_overflow), L, optimize); } +inline void MacroAssembler::bge_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs0, bi0(crx, less), L, optimize); } +inline void MacroAssembler::ble_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs0, bi0(crx, greater), L, optimize); } +inline void MacroAssembler::bne_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs0, bi0(crx, equal), L, optimize); } +inline void MacroAssembler::bns_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs0, bi0(crx, summary_overflow), L, optimize); } + +inline address MacroAssembler::call_stub(Register function_entry) { + mtctr(function_entry); + bctrl(); + return pc(); +} + +inline void MacroAssembler::call_stub_and_return_to(Register function_entry, Register return_pc) { + assert_different_registers(function_entry, return_pc); + mtlr(return_pc); + mtctr(function_entry); + bctr(); +} + +// Get the pc where the last emitted call will return to. +inline address MacroAssembler::last_calls_return_pc() { + return _last_calls_return_pc; +} + +// Read from the polling page, its address is already in a register. +inline void MacroAssembler::load_from_polling_page(Register polling_page_address, int offset) { + ld(R0, offset, polling_page_address); +} + +// Trap-instruction-based checks. + +inline void MacroAssembler::trap_null_check(Register a, trap_to_bits cmp) { + assert(TrapBasedNullChecks, "sanity"); + tdi(cmp, a/*reg a*/, 0); +} +inline void MacroAssembler::trap_zombie_not_entrant() { + tdi(traptoUnconditional, 0/*reg 0*/, 1); +} +inline void MacroAssembler::trap_should_not_reach_here() { + tdi_unchecked(traptoUnconditional, 0/*reg 0*/, 2); +} + +inline void MacroAssembler::trap_ic_miss_check(Register a, Register b) { + td(traptoGreaterThanUnsigned | traptoLessThanUnsigned, a, b); +} + +// Do an explicit null check if access to a+offset will not raise a SIGSEGV. +// Either issue a trap instruction that raises SIGTRAP, or do a compare that +// branches to exception_entry. +// No support for compressed oops (base page of heap). Does not distinguish +// loads and stores. +inline void MacroAssembler::null_check_throw(Register a, int offset, Register temp_reg, address exception_entry) { + if (!ImplicitNullChecks || needs_explicit_null_check(offset) NOT_LINUX(|| true) /*!os::zero_page_read_protected()*/) { + if (TrapBasedNullChecks) { + assert(UseSIGTRAP, "sanity"); + trap_null_check(a); + } else { + Label ok; + cmpdi(CCR0, a, 0); + bne(CCR0, ok); + load_const_optimized(temp_reg, exception_entry); + mtctr(temp_reg); + bctr(); + bind(ok); + } + } +} + +inline void MacroAssembler::ld_with_trap_null_check(Register d, int si16, Register s1) { + if ( NOT_LINUX(true) LINUX_ONLY(false)/*!os::zero_page_read_protected()*/) { + if (TrapBasedNullChecks) { + trap_null_check(s1); + } + } + ld(d, si16, s1); +} + +// Attention: No null check for loaded uncompressed OOP. Can be used for loading klass field. +inline void MacroAssembler::load_heap_oop_with_trap_null_check(Register d, RegisterOrConstant si16, + Register s1) { + if ( NOT_LINUX(true)LINUX_ONLY(false) /*!os::zero_page_read_protected()*/) { + if (TrapBasedNullChecks) { + trap_null_check(s1); + } + } + load_heap_oop_not_null(d, si16, s1); +} + +inline void MacroAssembler::load_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1) { + if (UseCompressedOops) { + lwz(d, offs, s1); + // Attention: no null check here! + decode_heap_oop_not_null(d); + } else { + ld(d, offs, s1); + } +} + +inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1) { + if (UseCompressedOops) { + lwz(d, offs, s1); + decode_heap_oop(d); + } else { + ld(d, offs, s1); + } +} + +inline void MacroAssembler::encode_heap_oop_not_null(Register d) { + if (Universe::narrow_oop_base() != NULL) { + sub(d, d, R30); + } + if (Universe::narrow_oop_shift() != 0) { + srdi(d, d, LogMinObjAlignmentInBytes); + } +} + +inline void MacroAssembler::decode_heap_oop_not_null(Register d) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + sldi(d, d, LogMinObjAlignmentInBytes); + } + if (Universe::narrow_oop_base() != NULL) { + add(d, d, R30); + } +} + +inline void MacroAssembler::decode_heap_oop(Register d) { + Label isNull; + if (Universe::narrow_oop_base() != NULL) { + cmpwi(CCR0, d, 0); + beq(CCR0, isNull); + } + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + sldi(d, d, LogMinObjAlignmentInBytes); + } + if (Universe::narrow_oop_base() != NULL) { + add(d, d, R30); + } + bind(isNull); +} + +// SIGTRAP-based range checks for arrays. +inline void MacroAssembler::trap_range_check_l(Register a, Register b) { + tw (traptoLessThanUnsigned, a/*reg a*/, b/*reg b*/); +} +inline void MacroAssembler::trap_range_check_l(Register a, int si16) { + twi(traptoLessThanUnsigned, a/*reg a*/, si16); +} +inline void MacroAssembler::trap_range_check_le(Register a, int si16) { + twi(traptoEqual | traptoLessThanUnsigned, a/*reg a*/, si16); +} +inline void MacroAssembler::trap_range_check_g(Register a, int si16) { + twi(traptoGreaterThanUnsigned, a/*reg a*/, si16); +} +inline void MacroAssembler::trap_range_check_ge(Register a, Register b) { + tw (traptoEqual | traptoGreaterThanUnsigned, a/*reg a*/, b/*reg b*/); +} +inline void MacroAssembler::trap_range_check_ge(Register a, int si16) { + twi(traptoEqual | traptoGreaterThanUnsigned, a/*reg a*/, si16); +} + +#endif // CPU_PPC_VM_MACROASSEMBLER_PPC_INLINE_HPP diff --git a/src/cpu/ppc/vm/metaspaceShared_ppc.cpp b/src/cpu/ppc/vm/metaspaceShared_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/metaspaceShared_ppc.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "memory/metaspaceShared.hpp" + +// Generate the self-patching vtable method: +// +// This method will be called (as any other Klass virtual method) with +// the Klass itself as the first argument. Example: +// +// oop obj; +// int size = obj->klass()->klass_part()->oop_size(this); +// +// for which the virtual method call is Klass::oop_size(); +// +// The dummy method is called with the Klass object as the first +// operand, and an object as the second argument. +// + +//===================================================================== + +// All of the dummy methods in the vtable are essentially identical, +// differing only by an ordinal constant, and they bear no releationship +// to the original method which the caller intended. Also, there needs +// to be 'vtbl_list_size' instances of the vtable in order to +// differentiate between the 'vtable_list_size' original Klass objects. + +void MetaspaceShared::generate_vtable_methods(void** vtbl_list, + void** vtable, + char** md_top, + char* md_end, + char** mc_top, + char* mc_end) { + Unimplemented(); +} + diff --git a/src/cpu/ppc/vm/methodHandles_ppc.cpp b/src/cpu/ppc/vm/methodHandles_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/methodHandles_ppc.cpp @@ -0,0 +1,540 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/allocation.inline.hpp" +#include "prims/methodHandles.hpp" + +#define __ _masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) // nothing +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#define STOP(error) block_comment(error); __ stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant. +inline static RegisterOrConstant constant(int value) { + return RegisterOrConstant(value); +} + +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg) { + if (VerifyMethodHandles) + verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), temp_reg, temp2_reg, + "MH argument is a Class"); + __ ld(klass_reg, java_lang_Class::klass_offset_in_bytes(), klass_reg); +} + +#ifdef ASSERT +static int check_nonzero(const char* xname, int x) { + assert(x != 0, err_msg("%s should be nonzero", xname)); + return x; +} +#define NONZERO(x) check_nonzero(#x, x) +#else //ASSERT +#define NONZERO(x) (x) +#endif //ASSERT + +#ifdef ASSERT +void MethodHandles::verify_klass(MacroAssembler* _masm, + Register obj_reg, SystemDictionary::WKID klass_id, + Register temp_reg, Register temp2_reg, + const char* error_message) { + Klass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); + KlassHandle klass = SystemDictionary::well_known_klass(klass_id); + Label L_ok, L_bad; + BLOCK_COMMENT("verify_klass {"); + __ verify_oop(obj_reg); + __ cmpdi(CCR0, obj_reg, 0); + __ beq(CCR0, L_bad); + __ load_klass(temp_reg, obj_reg); + __ load_const_optimized(temp2_reg, (address) klass_addr); + __ ld(temp2_reg, 0, temp2_reg); + __ cmpd(CCR0, temp_reg, temp2_reg); + __ beq(CCR0, L_ok); + __ ld(temp_reg, klass->super_check_offset(), temp_reg); + __ cmpd(CCR0, temp_reg, temp2_reg); + __ beq(CCR0, L_ok); + __ BIND(L_bad); + __ stop(error_message); + __ BIND(L_ok); + BLOCK_COMMENT("} verify_klass"); +} + +void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { + Label L; + BLOCK_COMMENT("verify_ref_kind {"); + __ load_sized_value(temp, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()), member_reg, + sizeof(u4), /*is_signed*/ false); + // assert(sizeof(u4) == sizeof(java.lang.invoke.MemberName.flags), ""); + __ srwi( temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); + __ andi(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); + __ cmpwi(CCR1, temp, ref_kind); + __ beq(CCR1, L); + { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); + jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); + if (ref_kind == JVM_REF_invokeVirtual || + ref_kind == JVM_REF_invokeSpecial) + // could do this for all ref_kinds, but would explode assembly code size + trace_method_handle(_masm, buf); + __ stop(buf); + } + BLOCK_COMMENT("} verify_ref_kind"); + __ BIND(L); +} + +#endif // ASSERT + +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register target, Register temp, + bool for_compiler_entry) { + assert(method == R19_method, "interpreter calling convention"); + assert_different_registers(method, target, temp); + + if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + __ verify_thread(); + __ lwz(temp, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread); + __ cmplwi(CCR0, temp, 0); + __ beq(CCR0, run_compiled_code); + __ ld(target, in_bytes(Method::interpreter_entry_offset()), R19_method); + __ mtctr(target); + __ bctr(); + __ BIND(run_compiled_code); + } + + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + Method::from_interpreted_offset(); + __ ld(target, in_bytes(entry_offset), R19_method); + __ mtctr(target); + __ bctr(); +} + + +void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, Register temp3, + bool for_compiler_entry) { + BLOCK_COMMENT("jump_to_lambda_form {"); + // This is the initial entry point of a lazy method handle. + // After type checking, it picks up the invoker from the LambdaForm. + assert_different_registers(recv, method_temp, temp2); // temp3 is only passed on + assert(method_temp == R19_method, "required register for loading method"); + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ verify_oop(recv); + __ load_heap_oop_not_null(method_temp, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes()), recv); + __ verify_oop(method_temp); + __ load_heap_oop_not_null(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes()), method_temp); + __ verify_oop(method_temp); + // the following assumes that a Method* is normally compressed in the vmtarget field: + __ ld(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()), method_temp); + + if (VerifyMethodHandles && !for_compiler_entry) { + // make sure recv is already on stack + __ ld(temp2, in_bytes(Method::const_offset()), method_temp); + __ load_sized_value(temp2, in_bytes(ConstMethod::size_of_parameters_offset()), temp2, + sizeof(u2), /*is_signed*/ false); + // assert(sizeof(u2) == sizeof(ConstMethod::_size_of_parameters), ""); + Label L; + __ ld(temp2, __ argument_offset(temp2, temp2, 0), R17_tos); + __ cmpd(CCR1, temp2, recv); + __ beq(CCR1, L); + __ stop("receiver not on stack"); + __ BIND(L); + } + + jump_from_method_handle(_masm, method_temp, temp2, temp3, for_compiler_entry); + BLOCK_COMMENT("} jump_to_lambda_form"); +} + + + +// Code generation +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, + vmIntrinsics::ID iid) { + const bool not_for_compiler_entry = false; // this is the interpreter entry + assert(is_signature_polymorphic(iid), "expected invoke iid"); + if (iid == vmIntrinsics::_invokeGeneric || + iid == vmIntrinsics::_compiledLambdaForm) { + // Perhaps surprisingly, the symbolic references visible to Java are not directly used. + // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. + // They all allow an appendix argument. + __ stop("Should not reach here"); // empty stubs make SG sick + return NULL; + } + + Register argbase = R17_tos; // parameter (preserved) + Register argslot = R3; + Register temp1 = R6; + Register param_size = R7; + + // here's where control starts out: + __ align(CodeEntryAlignment); + address entry_point = __ pc(); + + if (VerifyMethodHandles) { + Label L; + BLOCK_COMMENT("verify_intrinsic_id {"); + __ load_sized_value(temp1, Method::intrinsic_id_offset_in_bytes(), R19_method, + sizeof(u1), /*is_signed*/ false); + // assert(sizeof(u1) == sizeof(Method::_intrinsic_id), ""); + __ cmpwi(CCR1, temp1, (int) iid); + __ beq(CCR1, L); + if (iid == vmIntrinsics::_linkToVirtual || + iid == vmIntrinsics::_linkToSpecial) { + // could do this for all kinds, but would explode assembly code size + trace_method_handle(_masm, "bad Method*:intrinsic_id"); + } + __ stop("bad Method*::intrinsic_id"); + __ BIND(L); + BLOCK_COMMENT("} verify_intrinsic_id"); + } + + // First task: Find out how big the argument list is. + int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); + assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); + if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { + __ ld(param_size, in_bytes(Method::const_offset()), R19_method); + __ load_sized_value(param_size, in_bytes(ConstMethod::size_of_parameters_offset()), param_size, + sizeof(u2), /*is_signed*/ false); + // assert(sizeof(u2) == sizeof(ConstMethod::_size_of_parameters), ""); + } else { + DEBUG_ONLY(param_size = noreg); + } + + Register tmp_mh = noreg; + if (!is_signature_polymorphic_static(iid)) { + __ ld(tmp_mh = temp1, __ argument_offset(param_size, param_size, 0), argbase); + DEBUG_ONLY(param_size = noreg); + } + + if (TraceMethodHandles) { + if (tmp_mh != noreg) + __ mr(R23_method_handle, tmp_mh); // make stub happy + trace_method_handle_interpreter_entry(_masm, iid); + } + + if (iid == vmIntrinsics::_invokeBasic) { + generate_method_handle_dispatch(_masm, iid, tmp_mh, noreg, not_for_compiler_entry); + + } else { + // Adjust argument list by popping the trailing MemberName argument. + Register tmp_recv = noreg; + if (MethodHandles::ref_kind_has_receiver(ref_kind)) { + // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. + __ ld(tmp_recv = temp1, __ argument_offset(param_size, param_size, 0), argbase); + DEBUG_ONLY(param_size = noreg); + } + Register R19_member = R19_method; // MemberName ptr; incoming method ptr is dead now + __ ld(R19_member, RegisterOrConstant((intptr_t)8), argbase); + __ add(argbase, Interpreter::stackElementSize, argbase); + generate_method_handle_dispatch(_masm, iid, tmp_recv, R19_member, not_for_compiler_entry); + } + + return entry_point; +} + +void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + vmIntrinsics::ID iid, + Register receiver_reg, + Register member_reg, + bool for_compiler_entry) { + assert(is_signature_polymorphic(iid), "expected invoke iid"); + Register temp1 = (for_compiler_entry ? R21_tmp1 : R7); + Register temp2 = (for_compiler_entry ? R22_tmp2 : R8); + Register temp3 = (for_compiler_entry ? R23_tmp3 : R9); + Register temp4 = (for_compiler_entry ? R24_tmp4 : R10); + if (receiver_reg != noreg) assert_different_registers(temp1, temp2, temp3, temp4, receiver_reg); + if (member_reg != noreg) assert_different_registers(temp1, temp2, temp3, temp4, member_reg); + + if (iid == vmIntrinsics::_invokeBasic) { + // indirect through MH.form.vmentry.vmtarget + jump_to_lambda_form(_masm, receiver_reg, R19_method, temp1, temp2, for_compiler_entry); + } else { + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // make sure the trailing argument really is a MemberName (caller responsibility) + verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(MemberName_klass), + temp1, temp2, + "MemberName required for invokeVirtual etc."); + } + + Register temp1_recv_klass = temp1; + if (iid != vmIntrinsics::_linkToStatic) { + __ verify_oop(receiver_reg); + if (iid == vmIntrinsics::_linkToSpecial) { + // Don't actually load the klass; just null-check the receiver. + __ null_check_throw(receiver_reg, 0, temp1, StubRoutines::throw_NullPointerException_at_call_entry()); + } else { + // load receiver klass itself + __ null_check_throw(receiver_reg, oopDesc::klass_offset_in_bytes(), + temp1, StubRoutines::throw_NullPointerException_at_call_entry()); + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + BLOCK_COMMENT("check_receiver {"); + // The receiver for the MemberName must be in receiver_reg. + // Check the receiver against the MemberName.clazz + if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { + // Did not load it above... + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { + Label L_ok; + Register temp2_defc = temp2; + __ load_heap_oop_not_null(temp2_defc, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()), member_reg); + load_klass_from_Class(_masm, temp2_defc, temp3, temp4); + __ verify_klass_ptr(temp2_defc); + __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, temp4, L_ok); + // If we get here, the type check failed! + __ stop("receiver class disagrees with MemberName.clazz"); + __ BIND(L_ok); + } + BLOCK_COMMENT("} check_receiver"); + } + if (iid == vmIntrinsics::_linkToSpecial || + iid == vmIntrinsics::_linkToStatic) { + DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass + } + + // Live registers at this point: + // member_reg - MemberName that was the trailing argument + // temp1_recv_klass - klass of stacked receiver, if needed + // O5_savedSP - interpreter linkage (if interpreted) + // O0..O5 - compiler arguments (if compiled) + + Label L_incompatible_class_change_error; + switch (iid) { + case vmIntrinsics::_linkToSpecial: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp2); + } + __ ld(R19_method, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()), member_reg); + break; + + case vmIntrinsics::_linkToStatic: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp2); + } + __ ld(R19_method, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()), member_reg); + break; + + case vmIntrinsics::_linkToVirtual: + { + // same as TemplateTable::invokevirtual, + // minus the CP setup and profiling: + + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp2); + } + + // pick out the vtable index from the MemberName, and then we can discard it: + Register temp2_index = temp2; + __ ld(temp2_index, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()), member_reg); + + if (VerifyMethodHandles) { + Label L_index_ok; + __ cmpdi(CCR1, temp2_index, 0); + __ bge(CCR1, L_index_ok); + __ stop("no virtual index"); + __ BIND(L_index_ok); + } + + // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget + // at this point. And VerifyMethodHandles has already checked clazz, if needed. + + // get target Method* & entry point + __ lookup_virtual_method(temp1_recv_klass, temp2_index, R19_method); + break; + } + + case vmIntrinsics::_linkToInterface: + { + // same as TemplateTable::invokeinterface + // (minus the CP setup and profiling, with different argument motion) + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp2); + } + + Register temp2_intf = temp2; + __ load_heap_oop_not_null(temp2_intf, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()), member_reg); + load_klass_from_Class(_masm, temp2_intf, temp3, temp4); + __ verify_klass_ptr(temp2_intf); + + Register vtable_index = R19_method; + __ ld(vtable_index, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()), member_reg); + if (VerifyMethodHandles) { + Label L_index_ok; + __ cmpdi(CCR1, vtable_index, 0); + __ bge(CCR1, L_index_ok); + __ stop("invalid vtable index for MH.invokeInterface"); + __ BIND(L_index_ok); + } + + // given intf, index, and recv klass, dispatch to the implementation method + __ lookup_interface_method(temp1_recv_klass, temp2_intf, + // note: next two args must be the same: + vtable_index, R19_method, + temp3, temp4, + L_incompatible_class_change_error); + break; + } + + default: + fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); + break; + } + + // Live at this point: + // R19_method + // O5_savedSP (if interpreted) + + // After figuring out which concrete method to call, jump into it. + // Note that this works in the interpreter with no data motion. + // But the compiled version will require that rcx_recv be shifted out. + __ verify_method_ptr(R19_method); + jump_from_method_handle(_masm, R19_method, temp1, temp2, for_compiler_entry); + + if (iid == vmIntrinsics::_linkToInterface) { + __ BIND(L_incompatible_class_change_error); + __ load_const_optimized(temp1, StubRoutines::throw_IncompatibleClassChangeError_entry()); + __ mtctr(temp1); + __ bctr(); + } + } +} + +#ifndef PRODUCT +void trace_method_handle_stub(const char* adaptername, + oopDesc* mh, + intptr_t* entry_sp, + intptr_t* saved_regs) { + + bool has_mh = (strstr(adaptername, "/static") == NULL && + strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH + const char* mh_reg_name = has_mh ? "R23_method_handle" : "G23"; + tty->print_cr("MH %s %s="INTPTR_FORMAT " sp=" INTPTR_FORMAT, + adaptername, mh_reg_name, (intptr_t) mh, entry_sp); + + if (Verbose) { + tty->print_cr("Registers:"); + const int abi_offset = frame::abi_112_size / 8; + for (int i = R3->encoding(); i <= R13->encoding(); i++) { + Register r = as_Register(i); + int count = i - R3->encoding(); + // The registers are stored in reverse order on the stack (by save_volatile_gprs(R1_SP, abi_112_size)). + tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[abi_offset + count]); + if ((count + 1) % 4 == 0) { + tty->cr(); + } else { + tty->print(", "); + } + } + tty->cr(); + + { + // dumping last frame with frame::describe + + JavaThread* p = JavaThread::active(); + + ResourceMark rm; + PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here + FrameValues values; + + // Note: We want to allow trace_method_handle from any call site. + // While trace_method_handle creates a frame, it may be entered + // without a PC on the stack top (e.g. not just after a call). + // Walking that frame could lead to failures due to that invalid PC. + // => carefully detect that frame when doing the stack walking + + // Current C frame + frame cur_frame = os::current_frame(); + + // Robust search of trace_calling_frame (independant of inlining). + // Assumes saved_regs comes from a pusha in the trace_calling_frame. + assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); + frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame); + while (trace_calling_frame.fp() < saved_regs) { + trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); + } + + // safely create a frame and call frame::describe + intptr_t *dump_sp = trace_calling_frame.sender_sp(); + + frame dump_frame = frame(dump_sp); + dump_frame.describe(values, 1); + + values.describe(-1, saved_regs, "raw top of stack"); + + tty->print_cr("Stack layout:"); + values.print(p); + } + + if (has_mh && mh->is_oop()) { + mh->print(); + if (java_lang_invoke_MethodHandle::is_instance(mh)) { + if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) + java_lang_invoke_MethodHandle::form(mh)->print(); + } + } + } +} + +void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { + if (!TraceMethodHandles) return; + + BLOCK_COMMENT("trace_method_handle {"); + + int nbytes_save = 10 * 8; // 10 volatile gprs + __ save_LR_CR(R0); + __ mr(R0, R1_SP); // saved_sp + assert(Assembler::is_simm(-nbytes_save, 16), "Overwriting R0"); + // push_frame_abi112 only uses R0 if nbytes_save is wider than 16 bit + __ push_frame_abi112(nbytes_save, R0); + __ save_volatile_gprs(R1_SP, frame::abi_112_size); // Except R0. + + __ load_const(R3_ARG1, (address)adaptername); + __ mr(R4_ARG2, R23_method_handle); + __ mr(R5_ARG3, R0); // saved_sp + __ mr(R6_ARG4, R1_SP); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub)); + + __ restore_volatile_gprs(R1_SP, 112); // except R0 + __ pop_frame(); + __ restore_LR_CR(R0); + + BLOCK_COMMENT("} trace_method_handle"); +} +#endif // PRODUCT diff --git a/src/cpu/ppc/vm/methodHandles_ppc.hpp b/src/cpu/ppc/vm/methodHandles_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/methodHandles_ppc.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Platform-specific definitions for method handles. +// These definitions are inlined into class MethodHandles. + +// Adapters +//static unsigned int adapter_code_size() { +// return 32*K DEBUG_ONLY(+ 16*K) + (TraceMethodHandles ? 16*K : 0) + (VerifyMethodHandles ? 32*K : 0); +//} +enum /* platform_dependent_constants */ { + adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000)) +}; + +// Additional helper methods for MethodHandles code generation: +public: + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg); + + static void verify_klass(MacroAssembler* _masm, + Register obj_reg, SystemDictionary::WKID klass_id, + Register temp_reg, Register temp2_reg, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg, + Register temp_reg, Register temp2_reg) { + Unimplemented(); + } + + static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; + + // Similar to InterpreterMacroAssembler::jump_from_interpreted. + // Takes care of special dispatch from single stepping too. + static void jump_from_method_handle(MacroAssembler* _masm, Register method, + Register temp, Register temp2, + bool for_compiler_entry); + + static void jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, Register temp3, + bool for_compiler_entry); diff --git a/src/cpu/ppc/vm/nativeInst_ppc.cpp b/src/cpu/ppc/vm/nativeInst_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/nativeInst_ppc.cpp @@ -0,0 +1,382 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_ppc.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/ostream.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif + +// We use an illtrap for marking a method as not_entrant or zombie iff !UseSIGTRAP +// Work around a C++ compiler bug which changes 'this' +bool NativeInstruction::is_sigill_zombie_not_entrant_at(address addr) { + assert(!UseSIGTRAP, "precondition"); + if (*(int*)addr != 0 /*illtrap*/) return false; + CodeBlob* cb = CodeCache::find_blob_unsafe(addr); + if (cb == NULL || !cb->is_nmethod()) return false; + nmethod *nm = (nmethod *)cb; + // This method is not_entrant or zombie iff the illtrap instruction is + // located at the verified entry point. + return nm->verified_entry_point() == addr; +} + +#ifdef ASSERT +void NativeInstruction::verify() { + // Make sure code pattern is actually an instruction address. + address addr = addr_at(0); + if (addr == 0 || ((intptr_t)addr & 3) != 0) { + fatal("not an instruction address"); + } +} +#endif // ASSERT + +// Extract call destination from a NativeCall. The call might use a trampoline stub. +address NativeCall::destination() const { + address addr = (address)this; + address destination = Assembler::bxx_destination(addr); + + // Do we use a trampoline stub for this call? + CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. + assert(cb && cb->is_nmethod(), "sanity"); + nmethod *nm = (nmethod *)cb; + if (nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) { + // Yes we do, so get the destination from the trampoline stub. + const address trampoline_stub_addr = destination; + destination = NativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); + } + + return destination; +} + +// Similar to replace_mt_safe, but just changes the destination. The +// important thing is that free-running threads are able to execute this +// call instruction at all times. Thus, the displacement field must be +// instruction-word-aligned. +// +// Used in the runtime linkage of calls; see class CompiledIC. +// +// Add parameter assert_lock to switch off assertion +// during code generation, where no patching lock is needed. +void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { + assert(!assert_lock || + (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), + "concurrent code patching"); + + ResourceMark rm; + int code_size = 1 * BytesPerInstWord; + address addr_call = addr_at(0); + assert(MacroAssembler::is_bl(*(int*)addr_call), "unexpected code at call-site"); + + CodeBuffer cb(addr_call, code_size + 1); + MacroAssembler* a = new MacroAssembler(&cb); + + // Patch the call. + if (ReoptimizeCallSequences && + a->is_within_range_of_b(dest, addr_call)) { + a->bl(dest); + } else { + address trampoline_stub_addr = get_trampoline(); + + // We did not find a trampoline stub because the current codeblob + // does not provide this information. The branch will be patched + // later during a final fixup, when all necessary information is + // available. + if (trampoline_stub_addr == 0) + return; + + // Patch the constant in the call's trampoline stub. + NativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); + + a->bl(trampoline_stub_addr); + } + ICache::invalidate_range(addr_call, code_size); +} + +address NativeCall::get_trampoline() { + address call_addr = addr_at(0); + + CodeBlob *code = CodeCache::find_blob(call_addr); + assert(code != NULL, "Could not find the containing code blob"); + + // There are no relocations available when the code gets relocated + // because of CodeBuffer expansion. + if (code->relocation_size() == 0) + return NULL; + + address bl_destination = Assembler::bxx_destination(call_addr); + if (code->content_contains(bl_destination) && + is_NativeCallTrampolineStub_at(bl_destination)) + return bl_destination; + + // If the codeBlob is not a nmethod, this is because we get here from the + // CodeBlob constructor, which is called within the nmethod constructor. + return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); +} + +#ifdef ASSERT +void NativeCall::verify() { + address addr = addr_at(0); + + if (!NativeCall::is_call_at(addr)) { + tty->print_cr("not a NativeCall at " PTR_FORMAT, addr); + // TODO: PPC port: Disassembler::decode(addr - 20, addr + 20, tty); + fatal(err_msg("not a NativeCall at " PTR_FORMAT, addr)); + } +} +#endif // ASSERT + +#ifdef ASSERT +void NativeFarCall::verify() { + address addr = addr_at(0); + + NativeInstruction::verify(); + if (!NativeFarCall::is_far_call_at(addr)) { + tty->print_cr("not a NativeFarCall at " PTR_FORMAT, addr); + // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty); + fatal(err_msg("not a NativeFarCall at " PTR_FORMAT, addr)); + } +} +#endif // ASSERT + +address NativeMovConstReg::next_instruction_address() const { +#ifdef ASSERT + CodeBlob* nm = CodeCache::find_blob(instruction_address()); + assert(!MacroAssembler::is_set_narrow_oop(addr_at(0), nm->content_begin()), "Should not patch narrow oop here"); +#endif + + if (MacroAssembler::is_load_const_from_method_toc_at(addr_at(0))) { + return addr_at(load_const_from_method_toc_instruction_size); + } else { + return addr_at(load_const_instruction_size); + } +} + +intptr_t NativeMovConstReg::data() const { + address addr = addr_at(0); + CodeBlob* cb = CodeCache::find_blob_unsafe(addr); + + if (MacroAssembler::is_load_const_at(addr)) { + return MacroAssembler::get_const(addr); + } else if (MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) { + narrowOop no = (narrowOop)MacroAssembler::get_narrow_oop(addr, cb->content_begin()); + return (intptr_t)oopDesc::decode_heap_oop(no); + } else { + assert(MacroAssembler::is_load_const_from_method_toc_at(addr), "must be load_const_from_pool"); + + address ctable = cb->content_begin(); + int offset = MacroAssembler::get_offset_of_load_const_from_method_toc_at(addr); + return *(intptr_t *)(ctable + offset); + } +} + +address NativeMovConstReg::set_data_plain(intptr_t data, CodeBlob *cb) { + address addr = instruction_address(); + address next_address = NULL; + if (!cb) cb = CodeCache::find_blob(addr); + + if (cb != NULL && MacroAssembler::is_load_const_from_method_toc_at(addr)) { + // A load from the method's TOC (ctable). + assert(cb->is_nmethod(), "must be nmethod"); + const address ctable = cb->content_begin(); + const int toc_offset = MacroAssembler::get_offset_of_load_const_from_method_toc_at(addr); + *(intptr_t *)(ctable + toc_offset) = data; + next_address = addr + BytesPerInstWord; + } else if (cb != NULL && + MacroAssembler::is_calculate_address_from_global_toc_at(addr, cb->content_begin())) { + // A calculation relative to the global TOC. + const int invalidated_range = + MacroAssembler::patch_calculate_address_from_global_toc_at(addr, cb->content_begin(), + (address)data); + const address start = invalidated_range < 0 ? addr + invalidated_range : addr; + // FIXME: + const int range = invalidated_range < 0 ? 4 - invalidated_range : 8; + ICache::invalidate_range(start, range); + next_address = addr + 1 * BytesPerInstWord; + } else if (MacroAssembler::is_load_const_at(addr)) { + // A normal 5 instruction load_const code sequence. + // This is not mt safe, ok in methods like CodeBuffer::copy_code(). + MacroAssembler::patch_const(addr, (long)data); + ICache::invalidate_range(addr, load_const_instruction_size); + next_address = addr + 5 * BytesPerInstWord; + } else if (MacroAssembler::is_bl(* (int*) addr)) { + // A single branch-and-link instruction. + ResourceMark rm; + const int code_size = 1 * BytesPerInstWord; + CodeBuffer cb(addr, code_size + 1); + MacroAssembler* a = new MacroAssembler(&cb); + a->bl((address) data); + ICache::invalidate_range(addr, code_size); + next_address = addr + code_size; + } else { + ShouldNotReachHere(); + } + + return next_address; +} + +void NativeMovConstReg::set_data(intptr_t data) { + // Store the value into the instruction stream. + CodeBlob *cb = CodeCache::find_blob(instruction_address()); + address next_address = set_data_plain(data, cb); + + // Also store the value into an oop_Relocation cell, if any. + if (cb && cb->is_nmethod()) { + RelocIterator iter((nmethod *) cb, instruction_address(), next_address); + oop* oop_addr = NULL; + Metadata** metadata_addr = NULL; + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop_Relocation *r = iter.oop_reloc(); + if (oop_addr == NULL) { + oop_addr = r->oop_addr(); + *oop_addr = (oop)data; + } else { + assert(oop_addr == r->oop_addr(), "must be only one set-oop here") ; + } + } + if (iter.type() == relocInfo::metadata_type) { + metadata_Relocation *r = iter.metadata_reloc(); + if (metadata_addr == NULL) { + metadata_addr = r->metadata_addr(); + *metadata_addr = (Metadata*)data; + } else { + assert(metadata_addr == r->metadata_addr(), "must be only one set-metadata here"); + } + } + } + } +} + +void NativeMovConstReg::set_narrow_oop(narrowOop data, CodeBlob *code /* = NULL */) { + address addr = addr_at(0); + CodeBlob* cb = (code) ? code : CodeCache::find_blob(instruction_address()); + const int invalidated_range = + MacroAssembler::patch_set_narrow_oop(addr, cb->content_begin(), (long)data); + const address start = invalidated_range < 0 ? addr + invalidated_range : addr; + // FIXME: + const int range = invalidated_range < 0 ? 4 - invalidated_range : 8; + ICache::invalidate_range(start, range); +} + +// Do not use an assertion here. Let clients decide whether they only +// want this when assertions are enabled. +#ifdef ASSERT +void NativeMovConstReg::verify() { + address addr = addr_at(0); + CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // find_nmethod() asserts if nmethod is zombie. + if (! MacroAssembler::is_load_const_at(addr) && + ! MacroAssembler::is_load_const_from_method_toc_at(addr) && + ! (cb != NULL && MacroAssembler::is_calculate_address_from_global_toc_at(addr, cb->content_begin())) && + ! (cb != NULL && MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) && + ! MacroAssembler::is_bl(*((int*) addr))) { + tty->print_cr("not a NativeMovConstReg at " PTR_FORMAT, addr); + // TODO: PPC port Disassembler::decode(addr, 20, 20, tty); + fatal(err_msg("not a NativeMovConstReg at " PTR_FORMAT, addr)); + } +} +#endif // ASSERT + +void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + ResourceMark rm; + int code_size = 1 * BytesPerInstWord; + CodeBuffer cb(verified_entry, code_size + 1); + MacroAssembler* a = new MacroAssembler(&cb); +#ifdef COMPILER2 + assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); +#endif + // Patch this nmethod atomically. Always use illtrap/trap in debug build. + if (DEBUG_ONLY(false &&) a->is_within_range_of_b(dest, a->pc())) { + a->b(dest); + } else { + // The signal handler will continue at dest=OptoRuntime::handle_wrong_method_stub(). + if (TrapBasedNotEntrantChecks) { + // We use a special trap for marking a method as not_entrant or zombie. + a->trap_zombie_not_entrant(); + } else { + // We use an illtrap for marking a method as not_entrant or zombie. + a->illtrap(); + } + } + ICache::invalidate_range(verified_entry, code_size); +} + +#ifdef ASSERT +void NativeJump::verify() { + address addr = addr_at(0); + + NativeInstruction::verify(); + if (!NativeJump::is_jump_at(addr)) { + tty->print_cr("not a NativeJump at " PTR_FORMAT, addr); + // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty); + fatal(err_msg("not a NativeJump at " PTR_FORMAT, addr)); + } +} +#endif // ASSERT + +//------------------------------------------------------------------- + +// Call trampoline stubs. +// +// Layout and instructions of a call trampoline stub: +// 0: load the TOC (part 1) +// 4: load the TOC (part 2) +// 8: load the call target from the constant pool (part 1) +// [12: load the call target from the constant pool (part 2, optional)] +// ..: branch via CTR +// + +address NativeCallTrampolineStub::encoded_destination_addr() const { + address instruction_addr = addr_at(2 * BytesPerInstWord); + assert(MacroAssembler::is_ld_largeoffset(instruction_addr), + "must be a ld with large offset (from the constant pool)"); + + return instruction_addr; +} + +address NativeCallTrampolineStub::destination() const { + CodeBlob* cb = CodeCache::find_blob(addr_at(0)); + address ctable = cb->content_begin(); + + return *(address*)(ctable + destination_toc_offset()); +} + +int NativeCallTrampolineStub::destination_toc_offset() const { + return MacroAssembler::get_ld_largeoffset_offset(encoded_destination_addr()); +} + +void NativeCallTrampolineStub::set_destination(address new_destination) { + CodeBlob* cb = CodeCache::find_blob(addr_at(0)); + address ctable = cb->content_begin(); + + *(address*)(ctable + destination_toc_offset()) = new_destination; +} + diff --git a/src/cpu/ppc/vm/nativeInst_ppc.hpp b/src/cpu/ppc/vm/nativeInst_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/nativeInst_ppc.hpp @@ -0,0 +1,397 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_NATIVEINST_PPC_HPP +#define CPU_PPC_VM_NATIVEINST_PPC_HPP + +#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" +#include "memory/allocation.hpp" +#include "runtime/icache.hpp" +#include "runtime/os.hpp" +#include "utilities/top.hpp" + +// We have interfaces for the following instructions: +// +// - NativeInstruction +// - NativeCall +// - NativeFarCall +// - NativeMovConstReg +// - NativeJump +// - NativeIllegalInstruction +// - NativeConditionalFarBranch +// - NativeCallTrampolineStub + +// The base class for different kinds of native instruction abstractions. +// It provides the primitive operations to manipulate code relative to this. +class NativeInstruction VALUE_OBJ_CLASS_SPEC { + friend class Relocation; + + public: + bool is_sigtrap_ic_miss_check() { + assert(UseSIGTRAP, "precondition"); + return MacroAssembler::is_trap_ic_miss_check(long_at(0)); + } + + bool is_sigtrap_null_check() { + assert(UseSIGTRAP && TrapBasedNullChecks, "precondition"); + return MacroAssembler::is_trap_null_check(long_at(0)); + } + + // We use a special trap for marking a method as not_entrant or zombie + // iff UseSIGTRAP. + bool is_sigtrap_zombie_not_entrant() { + assert(UseSIGTRAP, "precondition"); + return MacroAssembler::is_trap_zombie_not_entrant(long_at(0)); + } + + // We use an illtrap for marking a method as not_entrant or zombie + // iff !UseSIGTRAP. + bool is_sigill_zombie_not_entrant() { + assert(!UseSIGTRAP, "precondition"); + // Work around a C++ compiler bug which changes 'this'. + return NativeInstruction::is_sigill_zombie_not_entrant_at(addr_at(0)); + } + static bool is_sigill_zombie_not_entrant_at(address addr); + + // SIGTRAP-based implicit range checks + bool is_sigtrap_range_check() { + assert(UseSIGTRAP && TrapBasedRangeChecks, "precondition"); + return MacroAssembler::is_trap_range_check(long_at(0)); + } + + // 'should not reach here'. + bool is_sigtrap_should_not_reach_here() { + return MacroAssembler::is_trap_should_not_reach_here(long_at(0)); + } + + bool is_safepoint_poll() { + // Is the current instruction a POTENTIAL read access to the polling page? + // The current arguments of the instruction are not checked! + return MacroAssembler::is_load_from_polling_page(long_at(0), NULL); + } + + bool is_memory_serialization(JavaThread *thread, void *ucontext) { + // Is the current instruction a write access of thread to the + // memory serialization page? + return MacroAssembler::is_memory_serialization(long_at(0), thread, ucontext); + } + + address get_stack_bang_address(void *ucontext) { + // If long_at(0) is not a stack bang, return 0. Otherwise, return + // banged address. + return MacroAssembler::get_stack_bang_address(long_at(0), ucontext); + } + + protected: + address addr_at(int offset) const { return address(this) + offset; } + int long_at(int offset) const { return *(int*)addr_at(offset); } + + public: + void verify() NOT_DEBUG_RETURN; +}; + +inline NativeInstruction* nativeInstruction_at(address address) { + NativeInstruction* inst = (NativeInstruction*)address; + inst->verify(); + return inst; +} + +// The NativeCall is an abstraction for accessing/manipulating call +// instructions. It is used to manipulate inline caches, primitive & +// dll calls, etc. +// +// Sparc distinguishes `NativeCall' and `NativeFarCall'. On PPC64, +// at present, we provide a single class `NativeCall' representing the +// sequence `load_const, mtctr, bctrl' or the sequence 'ld_from_toc, +// mtctr, bctrl'. +class NativeCall: public NativeInstruction { + public: + + enum specific_constants { + load_const_instruction_size = 28, + load_const_from_method_toc_instruction_size = 16, + instruction_size = 16 // Used in shared code for calls with reloc_info. + }; + + static bool is_call_at(address a) { + return Assembler::is_bl(*(int*)(a)); + } + + static bool is_call_before(address return_address) { + return NativeCall::is_call_at(return_address - 4); + } + + address instruction_address() const { + return addr_at(0); + } + + address next_instruction_address() const { + // We have only bl. + assert(MacroAssembler::is_bl(*(int*)instruction_address()), "Should be bl instruction!"); + return addr_at(4); + } + + address return_address() const { + return next_instruction_address(); + } + + address destination() const; + + // The parameter assert_lock disables the assertion during code generation. + void set_destination_mt_safe(address dest, bool assert_lock = true); + + address get_trampoline(); + + void verify_alignment() {} // do nothing on ppc + void verify() NOT_DEBUG_RETURN; +}; + +inline NativeCall* nativeCall_at(address instr) { + NativeCall* call = (NativeCall*)instr; + call->verify(); + return call; +} + +inline NativeCall* nativeCall_before(address return_address) { + NativeCall* call = NULL; + if (MacroAssembler::is_bl(*(int*)(return_address - 4))) + call = (NativeCall*)(return_address - 4); + call->verify(); + return call; +} + +// The NativeFarCall is an abstraction for accessing/manipulating native +// call-anywhere instructions. +// Used to call native methods which may be loaded anywhere in the address +// space, possibly out of reach of a call instruction. +class NativeFarCall: public NativeInstruction { + public: + // We use MacroAssembler::bl64_patchable() for implementing a + // call-anywhere instruction. + + // Checks whether instr points at a NativeFarCall instruction. + static bool is_far_call_at(address instr) { + return MacroAssembler::is_bl64_patchable_at(instr); + } + + // Does the NativeFarCall implementation use a pc-relative encoding + // of the call destination? + // Used when relocating code. + bool is_pcrelative() { + assert(MacroAssembler::is_bl64_patchable_at((address)this), + "unexpected call type"); + return MacroAssembler::is_bl64_patchable_pcrelative_at((address)this); + } + + // Returns the NativeFarCall's destination. + address destination() const { + assert(MacroAssembler::is_bl64_patchable_at((address)this), + "unexpected call type"); + return MacroAssembler::get_dest_of_bl64_patchable_at((address)this); + } + + // Sets the NativeCall's destination, not necessarily mt-safe. + // Used when relocating code. + void set_destination(address dest) { + // Set new destination (implementation of call may change here). + assert(MacroAssembler::is_bl64_patchable_at((address)this), + "unexpected call type"); + MacroAssembler::set_dest_of_bl64_patchable_at((address)this, dest); + } + + void verify() NOT_DEBUG_RETURN; +}; + +// Instantiates a NativeFarCall object starting at the given instruction +// address and returns the NativeFarCall object. +inline NativeFarCall* nativeFarCall_at(address instr) { + NativeFarCall* call = (NativeFarCall*)instr; + call->verify(); + return call; +} + +// An interface for accessing/manipulating native set_oop imm, reg instructions. +// (used to manipulate inlined data references, etc.) +class NativeMovConstReg: public NativeInstruction { + public: + + enum specific_constants { + load_const_instruction_size = 20, + load_const_from_method_toc_instruction_size = 8, + instruction_size = 8 // Used in shared code for calls with reloc_info. + }; + + address instruction_address() const { + return addr_at(0); + } + + address next_instruction_address() const; + + // (The [set_]data accessor respects oop_type relocs also.) + intptr_t data() const; + + // Patch the code stream. + address set_data_plain(intptr_t x, CodeBlob *code); + // Patch the code stream and oop pool. + void set_data(intptr_t x); + + // Patch narrow oop constants. Use this also for narrow klass. + void set_narrow_oop(narrowOop data, CodeBlob *code = NULL); + + void verify() NOT_DEBUG_RETURN; +}; + +inline NativeMovConstReg* nativeMovConstReg_at(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)address; + test->verify(); + return test; +} + +// The NativeJump is an abstraction for accessing/manipulating native +// jump-anywhere instructions. +class NativeJump: public NativeInstruction { + public: + // We use MacroAssembler::b64_patchable() for implementing a + // jump-anywhere instruction. + + enum specific_constants { + instruction_size = MacroAssembler::b64_patchable_size + }; + + // Checks whether instr points at a NativeJump instruction. + static bool is_jump_at(address instr) { + return MacroAssembler::is_b64_patchable_at(instr) + || ( MacroAssembler::is_load_const_from_method_toc_at(instr) + && Assembler::is_mtctr(*(int*)(instr + 2 * 4)) + && Assembler::is_bctr(*(int*)(instr + 3 * 4))); + } + + // Does the NativeJump implementation use a pc-relative encoding + // of the call destination? + // Used when relocating code or patching jumps. + bool is_pcrelative() { + return MacroAssembler::is_b64_patchable_pcrelative_at((address)this); + } + + // Returns the NativeJump's destination. + address jump_destination() const { + if (MacroAssembler::is_b64_patchable_at((address)this)) { + return MacroAssembler::get_dest_of_b64_patchable_at((address)this); + } else if (MacroAssembler::is_load_const_from_method_toc_at((address)this) + && Assembler::is_mtctr(*(int*)((address)this + 2 * 4)) + && Assembler::is_bctr(*(int*)((address)this + 3 * 4))) { + return (address)((NativeMovConstReg *)this)->data(); + } else { + ShouldNotReachHere(); + return NULL; + } + } + + // Sets the NativeJump's destination, not necessarily mt-safe. + // Used when relocating code or patching jumps. + void set_jump_destination(address dest) { + // Set new destination (implementation of call may change here). + if (MacroAssembler::is_b64_patchable_at((address)this)) { + MacroAssembler::set_dest_of_b64_patchable_at((address)this, dest); + } else if (MacroAssembler::is_load_const_from_method_toc_at((address)this) + && Assembler::is_mtctr(*(int*)((address)this + 2 * 4)) + && Assembler::is_bctr(*(int*)((address)this + 3 * 4))) { + ((NativeMovConstReg *)this)->set_data((intptr_t)dest); + } else { + ShouldNotReachHere(); + } + } + + // MT-safe insertion of native jump at verified method entry + static void patch_verified_entry(address entry, address verified_entry, address dest); + + void verify() NOT_DEBUG_RETURN; + + static void check_verified_entry_alignment(address entry, address verified_entry) { + // We just patch one instruction on ppc64, so the jump doesn't have to + // be aligned. Nothing to do here. + } +}; + +// Instantiates a NativeJump object starting at the given instruction +// address and returns the NativeJump object. +inline NativeJump* nativeJump_at(address instr) { + NativeJump* call = (NativeJump*)instr; + call->verify(); + return call; +} + +// NativeConditionalFarBranch is abstraction for accessing/manipulating +// conditional far branches. +class NativeConditionalFarBranch : public NativeInstruction { + public: + + static bool is_conditional_far_branch_at(address instr) { + return MacroAssembler::is_bc_far_at(instr); + } + + address branch_destination() const { + return MacroAssembler::get_dest_of_bc_far_at((address)this); + } + + void set_branch_destination(address dest) { + MacroAssembler::set_dest_of_bc_far_at((address)this, dest); + } +}; + +inline NativeConditionalFarBranch* NativeConditionalFarBranch_at(address address) { + assert(NativeConditionalFarBranch::is_conditional_far_branch_at(address), + "must be a conditional far branch"); + return (NativeConditionalFarBranch*)address; +} + +// Call trampoline stubs. +class NativeCallTrampolineStub : public NativeInstruction { + private: + + address encoded_destination_addr() const; + + public: + + address destination() const; + int destination_toc_offset() const; + + void set_destination(address new_destination); +}; + + +inline bool is_NativeCallTrampolineStub_at(address address) { + int first_instr = *(int*)address; + return Assembler::is_addis(first_instr) && + (Register)(intptr_t)Assembler::inv_rt_field(first_instr) == R12_scratch2; +} + +inline NativeCallTrampolineStub* NativeCallTrampolineStub_at(address address) { + assert(is_NativeCallTrampolineStub_at(address), "no call trampoline found"); + return (NativeCallTrampolineStub*)address; +} + +#endif // CPU_PPC_VM_NATIVEINST_PPC_HPP diff --git a/src/cpu/ppc/vm/registerMap_ppc.hpp b/src/cpu/ppc/vm/registerMap_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/registerMap_ppc.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_REGISTERMAP_PPC_HPP +#define CPU_PPC_VM_REGISTERMAP_PPC_HPP + +// machine-dependent implemention for register maps + friend class frame; + + private: + // This is the hook for finding a register in an "well-known" location, + // such as a register block of a predetermined format. + // Since there is none, we just return NULL. + // See registerMap_sparc.hpp for an example of grabbing registers + // from register save areas of a standard layout. + address pd_location(VMReg reg) const { return NULL; } + + // no PD state to clear or copy: + void pd_clear() {} + void pd_initialize() {} + void pd_initialize_from(const RegisterMap* map) {} + +#endif // CPU_PPC_VM_REGISTERMAP_PPC_HPP diff --git a/src/cpu/ppc/vm/register_definitions_ppc.cpp b/src/cpu/ppc/vm/register_definitions_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/register_definitions_ppc.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// make sure the defines don't screw up the declarations later on in this file +#define DONT_USE_REGISTER_DEFINES + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/register.hpp" +#include "register_ppc.hpp" +#ifdef TARGET_ARCH_MODEL_32 +# include "interp_masm_32.hpp" +#endif +#ifdef TARGET_ARCH_MODEL_ppc_64 +# include "interp_masm_ppc_64.hpp" +#endif + +REGISTER_DEFINITION(Register, noreg); + +REGISTER_DEFINITION(FloatRegister, fnoreg); diff --git a/src/cpu/ppc/vm/register_ppc.cpp b/src/cpu/ppc/vm/register_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/register_ppc.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "register_ppc.hpp" + +const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * 2; +const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + + FloatRegisterImpl::number_of_registers * 2; +const int ConcreteRegisterImpl::max_cnd = ConcreteRegisterImpl::max_fpr + + ConditionRegisterImpl::number_of_registers; + +const char* RegisterImpl::name() const { + const char* names[number_of_registers] = { + "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", + "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", + "R16", "R17", "R18", "R19", "R20", "R21", "R22", "R23", + "R24", "R25", "R26", "R27", "R28", "R29", "R30", "R31" + }; + return is_valid() ? names[encoding()] : "noreg"; +} + +const char* ConditionRegisterImpl::name() const { + const char* names[number_of_registers] = { + "CR0", "CR1", "CR2", "CR3", "CCR4", "CCR5", "CCR6", "CCR7" + }; + return is_valid() ? names[encoding()] : "cnoreg"; +} + +const char* FloatRegisterImpl::name() const { + const char* names[number_of_registers] = { + "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", + "F8", "F9", "F10", "F11", "F12", "F13", "F14", "F15", + "F16", "F17", "F18", "F19", "F20", "F21", "F22", "F23", + "F24", "F25", "F26", "F27", "F28", "F29", "F30", "F31" + }; + return is_valid() ? names[encoding()] : "fnoreg"; +} + +const char* SpecialRegisterImpl::name() const { + const char* names[number_of_registers] = { + "SR_XER", "SR_LR", "SR_CTR", "SR_VRSAVE", "R1_SPEFSCR", "SR_PPR" + }; + return is_valid() ? names[encoding()] : "snoreg"; +} + +const char* VectorRegisterImpl::name() const { + const char* names[number_of_registers] = { + "VR0", "VR1", "VR2", "VR3", "VR4", "VR5", "VR6", "VR7", + "VR8", "VR9", "VR10", "VR11", "VR12", "VR13", "VR14", "VR15", + "VR16", "VR17", "VR18", "VR19", "VR20", "VR21", "VR22", "VR23", + "VR24", "VR25", "VR26", "VR27", "VR28", "VR29", "VR30", "VR31" + }; + return is_valid() ? names[encoding()] : "vnoreg"; +} diff --git a/src/cpu/ppc/vm/register_ppc.hpp b/src/cpu/ppc/vm/register_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/register_ppc.hpp @@ -0,0 +1,635 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_REGISTER_PPC_HPP +#define CPU_PPC_VM_REGISTER_PPC_HPP + +#include "asm/register.hpp" +#include "vm_version_ppc.hpp" + +// forward declaration +class Address; +class VMRegImpl; +typedef VMRegImpl* VMReg; + +// PPC64 registers +// +// See "64-bit PowerPC ELF ABI Supplement 1.7", IBM Corp. (2003-10-29). +// (http://math-atlas.sourceforge.net/devel/assembly/PPC-elf64abi-1.7.pdf) +// +// r0 Register used in function prologs (volatile) +// r1 Stack pointer (nonvolatile) +// r2 TOC pointer (volatile) +// r3 Parameter and return value (volatile) +// r4-r10 Function parameters (volatile) +// r11 Register used in calls by pointer and as an environment pointer for languages which require one (volatile) +// r12 Register used for exception handling and glink code (volatile) +// r13 Reserved for use as system thread ID +// r14-r31 Local variables (nonvolatile) +// +// f0 Scratch register (volatile) +// f1-f4 Floating point parameters and return value (volatile) +// f5-f13 Floating point parameters (volatile) +// f14-f31 Floating point values (nonvolatile) +// +// LR Link register for return address (volatile) +// CTR Loop counter (volatile) +// XER Fixed point exception register (volatile) +// FPSCR Floating point status and control register (volatile) +// +// CR0-CR1 Condition code fields (volatile) +// CR2-CCR4 Condition code fields (nonvolatile) +// CCR5-CCR7 Condition code fields (volatile) +// +// ---------------------------------------------- +// On processors with the VMX feature: +// v0-v1 Volatile scratch registers +// v2-v13 Volatile vector parameters registers +// v14-v19 Volatile scratch registers +// v20-v31 Non-volatile registers +// vrsave Non-volatile 32-bit register + + +// Use Register as shortcut +class RegisterImpl; +typedef RegisterImpl* Register; + +inline Register as_Register(int encoding) { + assert(encoding >= 0 && encoding < 32, "bad register encoding"); + return (Register)(intptr_t)encoding; +} + +// The implementation of integer registers for the Power architecture +class RegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32 + }; + + // general construction + inline friend Register as_Register(int encoding); + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return value(); } + VMReg as_VMReg(); + Register successor() const { return as_Register(encoding() + 1); } + + // testers + bool is_valid() const { return ( 0 <= (value()&0x7F) && (value()&0x7F) < number_of_registers); } + bool is_volatile() const { return ( 0 <= (value()&0x7F) && (value()&0x7F) <= 13 ); } + bool is_nonvolatile() const { return (14 <= (value()&0x7F) && (value()&0x7F) <= 31 ); } + + const char* name() const; +}; + +// The integer registers of the PPC architecture +CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + +CONSTANT_REGISTER_DECLARATION(Register, R0, (0)); +CONSTANT_REGISTER_DECLARATION(Register, R1, (1)); +CONSTANT_REGISTER_DECLARATION(Register, R2, (2)); +CONSTANT_REGISTER_DECLARATION(Register, R3, (3)); +CONSTANT_REGISTER_DECLARATION(Register, R4, (4)); +CONSTANT_REGISTER_DECLARATION(Register, R5, (5)); +CONSTANT_REGISTER_DECLARATION(Register, R6, (6)); +CONSTANT_REGISTER_DECLARATION(Register, R7, (7)); +CONSTANT_REGISTER_DECLARATION(Register, R8, (8)); +CONSTANT_REGISTER_DECLARATION(Register, R9, (9)); +CONSTANT_REGISTER_DECLARATION(Register, R10, (10)); +CONSTANT_REGISTER_DECLARATION(Register, R11, (11)); +CONSTANT_REGISTER_DECLARATION(Register, R12, (12)); +CONSTANT_REGISTER_DECLARATION(Register, R13, (13)); +CONSTANT_REGISTER_DECLARATION(Register, R14, (14)); +CONSTANT_REGISTER_DECLARATION(Register, R15, (15)); +CONSTANT_REGISTER_DECLARATION(Register, R16, (16)); +CONSTANT_REGISTER_DECLARATION(Register, R17, (17)); +CONSTANT_REGISTER_DECLARATION(Register, R18, (18)); +CONSTANT_REGISTER_DECLARATION(Register, R19, (19)); +CONSTANT_REGISTER_DECLARATION(Register, R20, (20)); +CONSTANT_REGISTER_DECLARATION(Register, R21, (21)); +CONSTANT_REGISTER_DECLARATION(Register, R22, (22)); +CONSTANT_REGISTER_DECLARATION(Register, R23, (23)); +CONSTANT_REGISTER_DECLARATION(Register, R24, (24)); +CONSTANT_REGISTER_DECLARATION(Register, R25, (25)); +CONSTANT_REGISTER_DECLARATION(Register, R26, (26)); +CONSTANT_REGISTER_DECLARATION(Register, R27, (27)); +CONSTANT_REGISTER_DECLARATION(Register, R28, (28)); +CONSTANT_REGISTER_DECLARATION(Register, R29, (29)); +CONSTANT_REGISTER_DECLARATION(Register, R30, (30)); +CONSTANT_REGISTER_DECLARATION(Register, R31, (31)); + + +// +// Because Power has many registers, #define'ing values for them is +// beneficial in code size and is worth the cost of some of the +// dangers of defines. If a particular file has a problem with these +// defines then it's possible to turn them off in that file by +// defining DONT_USE_REGISTER_DEFINES. Register_definition_ppc.cpp +// does that so that it's able to provide real definitions of these +// registers for use in debuggers and such. +// + +#ifndef DONT_USE_REGISTER_DEFINES +#define noreg ((Register)(noreg_RegisterEnumValue)) + +#define R0 ((Register)(R0_RegisterEnumValue)) +#define R1 ((Register)(R1_RegisterEnumValue)) +#define R2 ((Register)(R2_RegisterEnumValue)) +#define R3 ((Register)(R3_RegisterEnumValue)) +#define R4 ((Register)(R4_RegisterEnumValue)) +#define R5 ((Register)(R5_RegisterEnumValue)) +#define R6 ((Register)(R6_RegisterEnumValue)) +#define R7 ((Register)(R7_RegisterEnumValue)) +#define R8 ((Register)(R8_RegisterEnumValue)) +#define R9 ((Register)(R9_RegisterEnumValue)) +#define R10 ((Register)(R10_RegisterEnumValue)) +#define R11 ((Register)(R11_RegisterEnumValue)) +#define R12 ((Register)(R12_RegisterEnumValue)) +#define R13 ((Register)(R13_RegisterEnumValue)) +#define R14 ((Register)(R14_RegisterEnumValue)) +#define R15 ((Register)(R15_RegisterEnumValue)) +#define R16 ((Register)(R16_RegisterEnumValue)) +#define R17 ((Register)(R17_RegisterEnumValue)) +#define R18 ((Register)(R18_RegisterEnumValue)) +#define R19 ((Register)(R19_RegisterEnumValue)) +#define R20 ((Register)(R20_RegisterEnumValue)) +#define R21 ((Register)(R21_RegisterEnumValue)) +#define R22 ((Register)(R22_RegisterEnumValue)) +#define R23 ((Register)(R23_RegisterEnumValue)) +#define R24 ((Register)(R24_RegisterEnumValue)) +#define R25 ((Register)(R25_RegisterEnumValue)) +#define R26 ((Register)(R26_RegisterEnumValue)) +#define R27 ((Register)(R27_RegisterEnumValue)) +#define R28 ((Register)(R28_RegisterEnumValue)) +#define R29 ((Register)(R29_RegisterEnumValue)) +#define R30 ((Register)(R30_RegisterEnumValue)) +#define R31 ((Register)(R31_RegisterEnumValue)) +#endif + +// Use ConditionRegister as shortcut +class ConditionRegisterImpl; +typedef ConditionRegisterImpl* ConditionRegister; + +inline ConditionRegister as_ConditionRegister(int encoding) { + assert(encoding >= 0 && encoding < 8, "bad condition register encoding"); + return (ConditionRegister)(intptr_t)encoding; +} + +// The implementation of condition register(s) for the PPC architecture +class ConditionRegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 8 + }; + + // construction. + inline friend ConditionRegister as_ConditionRegister(int encoding); + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return value(); } + VMReg as_VMReg(); + + // testers + bool is_valid() const { return (0 <= value() && value() < number_of_registers); } + bool is_nonvolatile() const { return (2 <= (value()&0x7F) && (value()&0x7F) <= 4 ); } + + const char* name() const; +}; + +// The (parts of the) condition register(s) of the PPC architecture +// sys/ioctl.h on AIX defines CR0-CR3, so I name these CCR. +CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR0, (0)); +CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR1, (1)); +CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR2, (2)); +CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR3, (3)); +CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR4, (4)); +CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR5, (5)); +CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR6, (6)); +CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR7, (7)); + +#ifndef DONT_USE_REGISTER_DEFINES + +#define CCR0 ((ConditionRegister)(CCR0_ConditionRegisterEnumValue)) +#define CCR1 ((ConditionRegister)(CCR1_ConditionRegisterEnumValue)) +#define CCR2 ((ConditionRegister)(CCR2_ConditionRegisterEnumValue)) +#define CCR3 ((ConditionRegister)(CCR3_ConditionRegisterEnumValue)) +#define CCR4 ((ConditionRegister)(CCR4_ConditionRegisterEnumValue)) +#define CCR5 ((ConditionRegister)(CCR5_ConditionRegisterEnumValue)) +#define CCR6 ((ConditionRegister)(CCR6_ConditionRegisterEnumValue)) +#define CCR7 ((ConditionRegister)(CCR7_ConditionRegisterEnumValue)) + +#endif // DONT_USE_REGISTER_DEFINES + + +// Use FloatRegister as shortcut +class FloatRegisterImpl; +typedef FloatRegisterImpl* FloatRegister; + +inline FloatRegister as_FloatRegister(int encoding) { + assert(encoding >= 0 && encoding < 32, "bad float register encoding"); + return (FloatRegister)(intptr_t)encoding; +} + +// The implementation of float registers for the PPC architecture +class FloatRegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32 + }; + + // construction + inline friend FloatRegister as_FloatRegister(int encoding); + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return value(); } + VMReg as_VMReg(); + FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } + + // testers + bool is_valid() const { return (0 <= value() && value() < number_of_registers); } + + const char* name() const; +}; + +// The float registers of the PPC architecture +CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg, (-1)); + +CONSTANT_REGISTER_DECLARATION(FloatRegister, F0, ( 0)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F1, ( 1)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F2, ( 2)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F3, ( 3)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F4, ( 4)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F5, ( 5)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F6, ( 6)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F7, ( 7)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F8, ( 8)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F9, ( 9)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F10, (10)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F11, (11)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F12, (12)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F13, (13)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F14, (14)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F15, (15)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F16, (16)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F17, (17)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F18, (18)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F19, (19)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F20, (20)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F21, (21)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F22, (22)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F23, (23)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F24, (24)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F25, (25)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F26, (26)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F27, (27)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F28, (28)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F29, (29)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F30, (30)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F31, (31)); + +#ifndef DONT_USE_REGISTER_DEFINES +#define fnoreg ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) +#define F0 ((FloatRegister)( F0_FloatRegisterEnumValue)) +#define F1 ((FloatRegister)( F1_FloatRegisterEnumValue)) +#define F2 ((FloatRegister)( F2_FloatRegisterEnumValue)) +#define F3 ((FloatRegister)( F3_FloatRegisterEnumValue)) +#define F4 ((FloatRegister)( F4_FloatRegisterEnumValue)) +#define F5 ((FloatRegister)( F5_FloatRegisterEnumValue)) +#define F6 ((FloatRegister)( F6_FloatRegisterEnumValue)) +#define F7 ((FloatRegister)( F7_FloatRegisterEnumValue)) +#define F8 ((FloatRegister)( F8_FloatRegisterEnumValue)) +#define F9 ((FloatRegister)( F9_FloatRegisterEnumValue)) +#define F10 ((FloatRegister)( F10_FloatRegisterEnumValue)) +#define F11 ((FloatRegister)( F11_FloatRegisterEnumValue)) +#define F12 ((FloatRegister)( F12_FloatRegisterEnumValue)) +#define F13 ((FloatRegister)( F13_FloatRegisterEnumValue)) +#define F14 ((FloatRegister)( F14_FloatRegisterEnumValue)) +#define F15 ((FloatRegister)( F15_FloatRegisterEnumValue)) +#define F16 ((FloatRegister)( F16_FloatRegisterEnumValue)) +#define F17 ((FloatRegister)( F17_FloatRegisterEnumValue)) +#define F18 ((FloatRegister)( F18_FloatRegisterEnumValue)) +#define F19 ((FloatRegister)( F19_FloatRegisterEnumValue)) +#define F20 ((FloatRegister)( F20_FloatRegisterEnumValue)) +#define F21 ((FloatRegister)( F21_FloatRegisterEnumValue)) +#define F22 ((FloatRegister)( F22_FloatRegisterEnumValue)) +#define F23 ((FloatRegister)( F23_FloatRegisterEnumValue)) +#define F24 ((FloatRegister)( F24_FloatRegisterEnumValue)) +#define F25 ((FloatRegister)( F25_FloatRegisterEnumValue)) +#define F26 ((FloatRegister)( F26_FloatRegisterEnumValue)) +#define F27 ((FloatRegister)( F27_FloatRegisterEnumValue)) +#define F28 ((FloatRegister)( F28_FloatRegisterEnumValue)) +#define F29 ((FloatRegister)( F29_FloatRegisterEnumValue)) +#define F30 ((FloatRegister)( F30_FloatRegisterEnumValue)) +#define F31 ((FloatRegister)( F31_FloatRegisterEnumValue)) +#endif // DONT_USE_REGISTER_DEFINES + +// Use SpecialRegister as shortcut +class SpecialRegisterImpl; +typedef SpecialRegisterImpl* SpecialRegister; + +inline SpecialRegister as_SpecialRegister(int encoding) { + return (SpecialRegister)(intptr_t)encoding; +} + +// The implementation of special registers for the Power architecture (LR, CTR and friends) +class SpecialRegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 6 + }; + + // construction + inline friend SpecialRegister as_SpecialRegister(int encoding); + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return value(); } + VMReg as_VMReg(); + + // testers + bool is_valid() const { return 0 <= value() && value() < number_of_registers; } + + const char* name() const; +}; + +// The special registers of the PPC architecture +CONSTANT_REGISTER_DECLARATION(SpecialRegister, SR_XER, (0)); +CONSTANT_REGISTER_DECLARATION(SpecialRegister, SR_LR, (1)); +CONSTANT_REGISTER_DECLARATION(SpecialRegister, SR_CTR, (2)); +CONSTANT_REGISTER_DECLARATION(SpecialRegister, SR_VRSAVE, (3)); +CONSTANT_REGISTER_DECLARATION(SpecialRegister, SR_SPEFSCR, (4)); +CONSTANT_REGISTER_DECLARATION(SpecialRegister, SR_PPR, (5)); + +#ifndef DONT_USE_REGISTER_DEFINES +#define SR_XER ((SpecialRegister)(SR_XER_SpecialRegisterEnumValue)) +#define SR_LR ((SpecialRegister)(SR_LR_SpecialRegisterEnumValue)) +#define SR_CTR ((SpecialRegister)(SR_CTR_SpecialRegisterEnumValue)) +#define SR_VRSAVE ((SpecialRegister)(SR_VRSAVE_SpecialRegisterEnumValue)) +#define SR_SPEFSCR ((SpecialRegister)(SR_SPEFSCR_SpecialRegisterEnumValue)) +#define SR_PPR ((SpecialRegister)(SR_PPR_SpecialRegisterEnumValue)) +#endif // DONT_USE_REGISTER_DEFINES + + +// Use VectorRegister as shortcut +class VectorRegisterImpl; +typedef VectorRegisterImpl* VectorRegister; + +inline VectorRegister as_VectorRegister(int encoding) { + return (VectorRegister)(intptr_t)encoding; +} + +// The implementation of vector registers for the Power architecture +class VectorRegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32 + }; + + // construction + inline friend VectorRegister as_VectorRegister(int encoding); + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return value(); } + + // testers + bool is_valid() const { return 0 <= value() && value() < number_of_registers; } + + const char* name() const; +}; + +// The Vector registers of the Power architecture + +CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg, (-1)); + +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR0, ( 0)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR1, ( 1)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR2, ( 2)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR3, ( 3)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR4, ( 4)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR5, ( 5)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR6, ( 6)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR7, ( 7)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR8, ( 8)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR9, ( 9)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR10, (10)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR11, (11)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR12, (12)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR13, (13)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR14, (14)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR15, (15)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR16, (16)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR17, (17)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR18, (18)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR19, (19)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR20, (20)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR21, (21)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR22, (22)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR23, (23)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR24, (24)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR25, (25)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR26, (26)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR27, (27)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR28, (28)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR29, (29)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR30, (30)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, VR31, (31)); + +#ifndef DONT_USE_REGISTER_DEFINES +#define vnoreg ((VectorRegister)(vnoreg_VectorRegisterEnumValue)) +#define VR0 ((VectorRegister)( VR0_VectorRegisterEnumValue)) +#define VR1 ((VectorRegister)( VR1_VectorRegisterEnumValue)) +#define VR2 ((VectorRegister)( VR2_VectorRegisterEnumValue)) +#define VR3 ((VectorRegister)( VR3_VectorRegisterEnumValue)) +#define VR4 ((VectorRegister)( VR4_VectorRegisterEnumValue)) +#define VR5 ((VectorRegister)( VR5_VectorRegisterEnumValue)) +#define VR6 ((VectorRegister)( VR6_VectorRegisterEnumValue)) +#define VR7 ((VectorRegister)( VR7_VectorRegisterEnumValue)) +#define VR8 ((VectorRegister)( VR8_VectorRegisterEnumValue)) +#define VR9 ((VectorRegister)( VR9_VectorRegisterEnumValue)) +#define VR10 ((VectorRegister)( VR10_VectorRegisterEnumValue)) +#define VR11 ((VectorRegister)( VR11_VectorRegisterEnumValue)) +#define VR12 ((VectorRegister)( VR12_VectorRegisterEnumValue)) +#define VR13 ((VectorRegister)( VR13_VectorRegisterEnumValue)) +#define VR14 ((VectorRegister)( VR14_VectorRegisterEnumValue)) +#define VR15 ((VectorRegister)( VR15_VectorRegisterEnumValue)) +#define VR16 ((VectorRegister)( VR16_VectorRegisterEnumValue)) +#define VR17 ((VectorRegister)( VR17_VectorRegisterEnumValue)) +#define VR18 ((VectorRegister)( VR18_VectorRegisterEnumValue)) +#define VR19 ((VectorRegister)( VR19_VectorRegisterEnumValue)) +#define VR20 ((VectorRegister)( VR20_VectorRegisterEnumValue)) +#define VR21 ((VectorRegister)( VR21_VectorRegisterEnumValue)) +#define VR22 ((VectorRegister)( VR22_VectorRegisterEnumValue)) +#define VR23 ((VectorRegister)( VR23_VectorRegisterEnumValue)) +#define VR24 ((VectorRegister)( VR24_VectorRegisterEnumValue)) +#define VR25 ((VectorRegister)( VR25_VectorRegisterEnumValue)) +#define VR26 ((VectorRegister)( VR26_VectorRegisterEnumValue)) +#define VR27 ((VectorRegister)( VR27_VectorRegisterEnumValue)) +#define VR28 ((VectorRegister)( VR28_VectorRegisterEnumValue)) +#define VR29 ((VectorRegister)( VR29_VectorRegisterEnumValue)) +#define VR30 ((VectorRegister)( VR30_VectorRegisterEnumValue)) +#define VR31 ((VectorRegister)( VR31_VectorRegisterEnumValue)) +#endif // DONT_USE_REGISTER_DEFINES + + +// Maximum number of incoming arguments that can be passed in i registers. +const int PPC_ARGS_IN_REGS_NUM = 8; + + +// Need to know the total number of registers of all sorts for SharedInfo. +// Define a class that exports it. +class ConcreteRegisterImpl : public AbstractRegisterImpl { + public: + enum { + // This number must be large enough to cover REG_COUNT (defined by c2) registers. + // There is no requirement that any ordering here matches any ordering c2 gives + // it's optoregs. + number_of_registers = + ( RegisterImpl::number_of_registers + + FloatRegisterImpl::number_of_registers ) + * 2 // register halves + + ConditionRegisterImpl::number_of_registers // condition code registers + + SpecialRegisterImpl::number_of_registers // special registers + + VectorRegisterImpl::number_of_registers // vector registers + }; + + static const int max_gpr; + static const int max_fpr; + static const int max_cnd; +}; + +// Common register declarations used in assembler code. +REGISTER_DECLARATION(Register, R0_SCRATCH, R0); // volatile +REGISTER_DECLARATION(Register, R1_SP, R1); // non-volatile +REGISTER_DECLARATION(Register, R2_TOC, R2); // volatile +REGISTER_DECLARATION(Register, R3_RET, R3); // volatile +REGISTER_DECLARATION(Register, R3_ARG1, R3); // volatile +REGISTER_DECLARATION(Register, R4_ARG2, R4); // volatile +REGISTER_DECLARATION(Register, R5_ARG3, R5); // volatile +REGISTER_DECLARATION(Register, R6_ARG4, R6); // volatile +REGISTER_DECLARATION(Register, R7_ARG5, R7); // volatile +REGISTER_DECLARATION(Register, R8_ARG6, R8); // volatile +REGISTER_DECLARATION(Register, R9_ARG7, R9); // volatile +REGISTER_DECLARATION(Register, R10_ARG8, R10); // volatile +REGISTER_DECLARATION(FloatRegister, FO_SCRATCH, F0); // volatile +REGISTER_DECLARATION(FloatRegister, F1_RET, F1); // volatile +REGISTER_DECLARATION(FloatRegister, F1_ARG1, F1); // volatile +REGISTER_DECLARATION(FloatRegister, F2_ARG2, F2); // volatile +REGISTER_DECLARATION(FloatRegister, F3_ARG3, F3); // volatile +REGISTER_DECLARATION(FloatRegister, F4_ARG4, F4); // volatile +REGISTER_DECLARATION(FloatRegister, F5_ARG5, F5); // volatile +REGISTER_DECLARATION(FloatRegister, F6_ARG6, F6); // volatile +REGISTER_DECLARATION(FloatRegister, F7_ARG7, F7); // volatile +REGISTER_DECLARATION(FloatRegister, F8_ARG8, F8); // volatile +REGISTER_DECLARATION(FloatRegister, F9_ARG9, F9); // volatile +REGISTER_DECLARATION(FloatRegister, F10_ARG10, F10); // volatile +REGISTER_DECLARATION(FloatRegister, F11_ARG11, F11); // volatile +REGISTER_DECLARATION(FloatRegister, F12_ARG12, F12); // volatile +REGISTER_DECLARATION(FloatRegister, F13_ARG13, F13); // volatile + +#ifndef DONT_USE_REGISTER_DEFINES +#define R0_SCRATCH AS_REGISTER(Register, R0) +#define R1_SP AS_REGISTER(Register, R1) +#define R2_TOC AS_REGISTER(Register, R2) +#define R3_RET AS_REGISTER(Register, R3) +#define R3_ARG1 AS_REGISTER(Register, R3) +#define R4_ARG2 AS_REGISTER(Register, R4) +#define R5_ARG3 AS_REGISTER(Register, R5) +#define R6_ARG4 AS_REGISTER(Register, R6) +#define R7_ARG5 AS_REGISTER(Register, R7) +#define R8_ARG6 AS_REGISTER(Register, R8) +#define R9_ARG7 AS_REGISTER(Register, R9) +#define R10_ARG8 AS_REGISTER(Register, R10) +#define FO_SCRATCH AS_REGISTER(FloatRegister, F0) +#define F1_RET AS_REGISTER(FloatRegister, F1) +#define F1_ARG1 AS_REGISTER(FloatRegister, F1) +#define F2_ARG2 AS_REGISTER(FloatRegister, F2) +#define F3_ARG3 AS_REGISTER(FloatRegister, F3) +#define F4_ARG4 AS_REGISTER(FloatRegister, F4) +#define F5_ARG5 AS_REGISTER(FloatRegister, F5) +#define F6_ARG6 AS_REGISTER(FloatRegister, F6) +#define F7_ARG7 AS_REGISTER(FloatRegister, F7) +#define F8_ARG8 AS_REGISTER(FloatRegister, F8) +#define F9_ARG9 AS_REGISTER(FloatRegister, F9) +#define F10_ARG10 AS_REGISTER(FloatRegister, F10) +#define F11_ARG11 AS_REGISTER(FloatRegister, F11) +#define F12_ARG12 AS_REGISTER(FloatRegister, F12) +#define F13_ARG13 AS_REGISTER(FloatRegister, F13) +#endif + +// Register declarations to be used in frame manager assembly code. +// Use only non-volatile registers in order to keep values across C-calls. +REGISTER_DECLARATION(Register, R14_state, R14); // address of new cInterpreter. +REGISTER_DECLARATION(Register, R15_prev_state, R15); // address of old cInterpreter +REGISTER_DECLARATION(Register, R16_thread, R16); // address of current thread +REGISTER_DECLARATION(Register, R17_tos, R17); // address of Java tos (prepushed). +REGISTER_DECLARATION(Register, R18_locals, R18); // address of first param slot (receiver). +REGISTER_DECLARATION(Register, R19_method, R19); // address of current method +#ifndef DONT_USE_REGISTER_DEFINES +#define R14_state AS_REGISTER(Register, R14) +#define R15_prev_state AS_REGISTER(Register, R15) +#define R16_thread AS_REGISTER(Register, R16) +#define R17_tos AS_REGISTER(Register, R17) +#define R18_locals AS_REGISTER(Register, R18) +#define R19_method AS_REGISTER(Register, R19) +#define R21_sender_SP AS_REGISTER(Register, R21) +#define R23_method_handle AS_REGISTER(Register, R23) +#endif + +// Temporary registers to be used within frame manager. We can use +// the non-volatiles because the call stub has saved them. +// Use only non-volatile registers in order to keep values across C-calls. +REGISTER_DECLARATION(Register, R21_tmp1, R21); +REGISTER_DECLARATION(Register, R22_tmp2, R22); +REGISTER_DECLARATION(Register, R23_tmp3, R23); +REGISTER_DECLARATION(Register, R24_tmp4, R24); +REGISTER_DECLARATION(Register, R25_tmp5, R25); +REGISTER_DECLARATION(Register, R26_tmp6, R26); +REGISTER_DECLARATION(Register, R27_tmp7, R27); +REGISTER_DECLARATION(Register, R28_tmp8, R28); +REGISTER_DECLARATION(Register, R29_tmp9, R29); +REGISTER_DECLARATION(Register, R30_polling_page, R30); +#ifndef DONT_USE_REGISTER_DEFINES +#define R21_tmp1 AS_REGISTER(Register, R21) +#define R22_tmp2 AS_REGISTER(Register, R22) +#define R23_tmp3 AS_REGISTER(Register, R23) +#define R24_tmp4 AS_REGISTER(Register, R24) +#define R25_tmp5 AS_REGISTER(Register, R25) +#define R26_tmp6 AS_REGISTER(Register, R26) +#define R27_tmp7 AS_REGISTER(Register, R27) +#define R28_tmp8 AS_REGISTER(Register, R28) +#define R29_tmp9 AS_REGISTER(Register, R29) +#define R30_polling_page AS_REGISTER(Register, R30) + +#define CCR4_is_synced AS_REGISTER(ConditionRegister, CCR4) +#endif + +// Scratch registers are volatile. +REGISTER_DECLARATION(Register, R11_scratch1, R11); +REGISTER_DECLARATION(Register, R12_scratch2, R12); +#ifndef DONT_USE_REGISTER_DEFINES +#define R11_scratch1 AS_REGISTER(Register, R11) +#define R12_scratch2 AS_REGISTER(Register, R12) +#endif + +#endif // CPU_PPC_VM_REGISTER_PPC_HPP diff --git a/src/cpu/ppc/vm/relocInfo_ppc.cpp b/src/cpu/ppc/vm/relocInfo_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/relocInfo_ppc.cpp @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.inline.hpp" +#include "assembler_ppc.inline.hpp" +#include "code/relocInfo.hpp" +#include "nativeInst_ppc.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/safepoint.hpp" + +void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + bool copy_back_to_oop_pool = true; // TODO: PPC port + // The following comment is from the declaration of DataRelocation: + // + // "The "o" (displacement) argument is relevant only to split relocations + // on RISC machines. In some CPUs (SPARC), the set-hi and set-lo ins'ns + // can encode more than 32 bits between them. This allows compilers to + // share set-hi instructions between addresses that differ by a small + // offset (e.g., different static variables in the same class). + // On such machines, the "x" argument to set_value on all set-lo + // instructions must be the same as the "x" argument for the + // corresponding set-hi instructions. The "o" arguments for the + // set-hi instructions are ignored, and must not affect the high-half + // immediate constant. The "o" arguments for the set-lo instructions are + // added into the low-half immediate constant, and must not overflow it." + // + // Currently we don't support splitting of relocations, so o must be + // zero: + assert(o == 0, "tried to split relocations"); + + if (!verify_only) { + if (format() != 1) { + nativeMovConstReg_at(addr())->set_data_plain(((intptr_t)x), code()); + } else { + assert(type() == relocInfo::oop_type || type() == relocInfo::metadata_type, + "how to encode else?"); + narrowOop no = (type() == relocInfo::oop_type) ? + oopDesc::encode_heap_oop((oop)x) : oopDesc::encode_klass((Klass*)x); + nativeMovConstReg_at(addr())->set_narrow_oop(no, code()); + } + } else { + assert((address) (nativeMovConstReg_at(addr())->data()) == x, "data must match"); + } +} + +address Relocation::pd_call_destination(address orig_addr) { + intptr_t adj = 0; + address inst_loc = addr(); + + if (orig_addr != NULL) { + // We just moved this call instruction from orig_addr to addr(). + // This means its target will appear to have grown by addr() - orig_addr. + adj = -(inst_loc - orig_addr); + } + if (NativeFarCall::is_far_call_at(inst_loc)) { + NativeFarCall* call = nativeFarCall_at(inst_loc); + return call->destination() + (intptr_t)(call->is_pcrelative() ? adj : 0); + } else if (NativeJump::is_jump_at(inst_loc)) { + NativeJump* jump = nativeJump_at(inst_loc); + return jump->jump_destination() + (intptr_t)(jump->is_pcrelative() ? adj : 0); + } else if (NativeConditionalFarBranch::is_conditional_far_branch_at(inst_loc)) { + NativeConditionalFarBranch* branch = NativeConditionalFarBranch_at(inst_loc); + return branch->branch_destination(); + } else { + // There are two instructions at the beginning of a stub, therefore we + // load at orig_addr + 8. + orig_addr = nativeCall_at(inst_loc)->get_trampoline(); + if (orig_addr == NULL) { + return (address) -1; + } else { + return (address) nativeMovConstReg_at(orig_addr + 8)->data(); + } + } +} + +void Relocation::pd_set_call_destination(address x) { + address inst_loc = addr(); + + if (NativeFarCall::is_far_call_at(inst_loc)) { + NativeFarCall* call = nativeFarCall_at(inst_loc); + call->set_destination(x); + } else if (NativeJump::is_jump_at(inst_loc)) { + NativeJump* jump= nativeJump_at(inst_loc); + jump->set_jump_destination(x); + } else if (NativeConditionalFarBranch::is_conditional_far_branch_at(inst_loc)) { + NativeConditionalFarBranch* branch = NativeConditionalFarBranch_at(inst_loc); + branch->set_branch_destination(x); + } else { + NativeCall* call = nativeCall_at(inst_loc); + call->set_destination_mt_safe(x, false); + } +} + +address* Relocation::pd_address_in_code() { + ShouldNotReachHere(); + return 0; +} + +address Relocation::pd_get_address_from_code() { + return (address)(nativeMovConstReg_at(addr())->data()); +} + +void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { +} + +void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { +} + +void metadata_Relocation::pd_fix_value(address x) { +} diff --git a/src/cpu/ppc/vm/relocInfo_ppc.hpp b/src/cpu/ppc/vm/relocInfo_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/relocInfo_ppc.hpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_RELOCINFO_PPC_HPP +#define CPU_PPC_VM_RELOCINFO_PPC_HPP + + // machine-dependent parts of class relocInfo + private: + enum { + // Since Power instructions are whole words, + // the two low-order offset bits can always be discarded. + offset_unit = 4, + + // There is no need for format bits; the instructions are + // sufficiently self-identifying. +#ifndef _LP64 + format_width = 0 +#else + // Except narrow oops in 64-bits VM. + format_width = 1 +#endif + }; + +#endif // CPU_PPC_VM_RELOCINFO_PPC_HPP diff --git a/src/cpu/ppc/vm/sharedRuntime_ppc.cpp b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp @@ -0,0 +1,3209 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/debugInfoRec.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/compiledICHolder.hpp" +#include "prims/jvmtiRedefineClassesTrace.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/vframeArray.hpp" +#include "vmreg_ppc.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +#define __ masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) // nothing +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + + +// Used by generate_deopt_blob. Defined in .ad file. +extern uint size_deopt_handler(); + + +class RegisterSaver { + // Used for saving volatile registers. + public: + + // Support different return pc locations. + enum ReturnPCLocation { + return_pc_is_lr, + return_pc_is_r4, + return_pc_is_thread_saved_exception_pc + }; + + static OopMap* push_frame_abi112_and_save_live_registers(MacroAssembler* masm, + int* out_frame_size_in_bytes, + bool generate_oop_map, + int return_pc_adjustment, + ReturnPCLocation return_pc_location); + static void restore_live_registers_and_pop_frame(MacroAssembler* masm, + int frame_size_in_bytes, + bool restore_ctr); + + static void push_frame_and_save_argument_registers(MacroAssembler* masm, + Register r_temp, + int frame_size, + int total_args, + const VMRegPair *regs, const VMRegPair *regs2 = NULL); + static void restore_argument_registers_and_pop_frame(MacroAssembler*masm, + int frame_size, + int total_args, + const VMRegPair *regs, const VMRegPair *regs2 = NULL); + + // During deoptimization only the result registers need to be restored + // all the other values have already been extracted. + static void restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes); + + // Constants and data structures: + + typedef enum { + int_reg = 0, + float_reg = 1, + special_reg = 2 + } RegisterType; + + typedef enum { + reg_size = 8, + half_reg_size = reg_size / 2, + } RegisterConstants; + + typedef struct { + RegisterType reg_type; + int reg_num; + VMReg vmreg; + } LiveRegType; +}; + + +#define RegisterSaver_LiveSpecialReg(regname) \ + { RegisterSaver::special_reg, regname->encoding(), regname->as_VMReg() } + +#define RegisterSaver_LiveIntReg(regname) \ + { RegisterSaver::int_reg, regname->encoding(), regname->as_VMReg() } + +#define RegisterSaver_LiveFloatReg(regname) \ + { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() } + +static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = { + // Live registers which get spilled to the stack. Register + // positions in this array correspond directly to the stack layout. + + // + // live special registers: + // + RegisterSaver_LiveSpecialReg(SR_CTR), + // + // live float registers: + // + RegisterSaver_LiveFloatReg( F0 ), + RegisterSaver_LiveFloatReg( F1 ), + RegisterSaver_LiveFloatReg( F2 ), + RegisterSaver_LiveFloatReg( F3 ), + RegisterSaver_LiveFloatReg( F4 ), + RegisterSaver_LiveFloatReg( F5 ), + RegisterSaver_LiveFloatReg( F6 ), + RegisterSaver_LiveFloatReg( F7 ), + RegisterSaver_LiveFloatReg( F8 ), + RegisterSaver_LiveFloatReg( F9 ), + RegisterSaver_LiveFloatReg( F10 ), + RegisterSaver_LiveFloatReg( F11 ), + RegisterSaver_LiveFloatReg( F12 ), + RegisterSaver_LiveFloatReg( F13 ), + RegisterSaver_LiveFloatReg( F14 ), + RegisterSaver_LiveFloatReg( F15 ), + RegisterSaver_LiveFloatReg( F16 ), + RegisterSaver_LiveFloatReg( F17 ), + RegisterSaver_LiveFloatReg( F18 ), + RegisterSaver_LiveFloatReg( F19 ), + RegisterSaver_LiveFloatReg( F20 ), + RegisterSaver_LiveFloatReg( F21 ), + RegisterSaver_LiveFloatReg( F22 ), + RegisterSaver_LiveFloatReg( F23 ), + RegisterSaver_LiveFloatReg( F24 ), + RegisterSaver_LiveFloatReg( F25 ), + RegisterSaver_LiveFloatReg( F26 ), + RegisterSaver_LiveFloatReg( F27 ), + RegisterSaver_LiveFloatReg( F28 ), + RegisterSaver_LiveFloatReg( F29 ), + RegisterSaver_LiveFloatReg( F30 ), + RegisterSaver_LiveFloatReg( F31 ), + // + // live integer registers: + // + RegisterSaver_LiveIntReg( R0 ), + //RegisterSaver_LiveIntReg( R1 ), // stack pointer + RegisterSaver_LiveIntReg( R2 ), + RegisterSaver_LiveIntReg( R3 ), + RegisterSaver_LiveIntReg( R4 ), + RegisterSaver_LiveIntReg( R5 ), + RegisterSaver_LiveIntReg( R6 ), + RegisterSaver_LiveIntReg( R7 ), + RegisterSaver_LiveIntReg( R8 ), + RegisterSaver_LiveIntReg( R9 ), + RegisterSaver_LiveIntReg( R10 ), + RegisterSaver_LiveIntReg( R11 ), + RegisterSaver_LiveIntReg( R12 ), + //RegisterSaver_LiveIntReg( R13 ), // system thread id + RegisterSaver_LiveIntReg( R14 ), + RegisterSaver_LiveIntReg( R15 ), + RegisterSaver_LiveIntReg( R16 ), + RegisterSaver_LiveIntReg( R17 ), + RegisterSaver_LiveIntReg( R18 ), + RegisterSaver_LiveIntReg( R19 ), + RegisterSaver_LiveIntReg( R20 ), + RegisterSaver_LiveIntReg( R21 ), + RegisterSaver_LiveIntReg( R22 ), + RegisterSaver_LiveIntReg( R23 ), + RegisterSaver_LiveIntReg( R24 ), + RegisterSaver_LiveIntReg( R25 ), + RegisterSaver_LiveIntReg( R26 ), + RegisterSaver_LiveIntReg( R27 ), + RegisterSaver_LiveIntReg( R28 ), + RegisterSaver_LiveIntReg( R29 ), + RegisterSaver_LiveIntReg( R31 ), + RegisterSaver_LiveIntReg( R30 ), // r30 must be the last register +}; + +OopMap* RegisterSaver::push_frame_abi112_and_save_live_registers(MacroAssembler* masm, + int* out_frame_size_in_bytes, + bool generate_oop_map, + int return_pc_adjustment, + ReturnPCLocation return_pc_location) { + // Push an abi112-frame and store all registers which may be live. + // If requested, create an OopMap: Record volatile registers as + // callee-save values in an OopMap so their save locations will be + // propagated to the RegisterMap of the caller frame during + // StackFrameStream construction (needed for deoptimization; see + // compiledVFrame::create_stack_value). + // If return_pc_adjustment != 0 adjust the return pc by return_pc_adjustment. + + int i; + int offset; + + // calcualte frame size + const int regstosave_num = sizeof(RegisterSaver_LiveRegs) / + sizeof(RegisterSaver::LiveRegType); + const int register_save_size = regstosave_num * reg_size; + const int frame_size_in_bytes = round_to(register_save_size, frame::alignment_in_bytes) + + frame::abi_112_size; + *out_frame_size_in_bytes = frame_size_in_bytes; + const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); + const int register_save_offset = frame_size_in_bytes - register_save_size; + + // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words. + OopMap* map = generate_oop_map ? new OopMap(frame_size_in_slots, 0) : NULL; + + BLOCK_COMMENT("push_frame_abi112_and_save_live_registers {"); + + // Save r30 in the last slot of the not yet pushed frame so that we + // can use it as scratch reg. + __ std(R30, -reg_size, R1_SP); + assert(-reg_size == register_save_offset - frame_size_in_bytes + ((regstosave_num-1)*reg_size), + "consistency check"); + + // save the flags + // Do the save_LR_CR by hand and adjust the return pc if requested. + __ mfcr(R30); + __ std(R30, _abi(cr), R1_SP); + switch (return_pc_location) { + case return_pc_is_lr: __ mflr(R30); break; + case return_pc_is_r4: __ mr(R30, R4); break; + case return_pc_is_thread_saved_exception_pc: + __ ld(R30, thread_(saved_exception_pc)); break; + default: ShouldNotReachHere(); + } + if (return_pc_adjustment != 0) + __ addi(R30, R30, return_pc_adjustment); + __ std(R30, _abi(lr), R1_SP); + + // push a new frame + __ push_frame(frame_size_in_bytes, R30); + + // save all registers (ints and floats) + offset = register_save_offset; + for (int i = 0; i < regstosave_num; i++) { + int reg_num = RegisterSaver_LiveRegs[i].reg_num; + int reg_type = RegisterSaver_LiveRegs[i].reg_type; + + switch (reg_type) { + case RegisterSaver::int_reg: { + if (reg_num != 30) { // We spilled R30 right at the beginning. + __ std(as_Register(reg_num), offset, R1_SP); + } + break; + } + case RegisterSaver::float_reg: { + __ stfd(as_FloatRegister(reg_num), offset, R1_SP); + break; + } + case RegisterSaver::special_reg: { + if (reg_num == SR_CTR_SpecialRegisterEnumValue) { + __ mfctr(R30); + __ std(R30, offset, R1_SP); + } else { + Unimplemented(); + } + break; + } + default: + ShouldNotReachHere(); + } + + if (generate_oop_map) { + map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), + RegisterSaver_LiveRegs[i].vmreg); + map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), + RegisterSaver_LiveRegs[i].vmreg->next()); + } + offset += reg_size; + } + + BLOCK_COMMENT("} push_frame_abi112_and_save_live_registers"); + + // And we're done. + return map; +} + + +// Pop the current frame and restore all the registers that we +// saved. +void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm, + int frame_size_in_bytes, + bool restore_ctr) { + int i; + int offset; + const int regstosave_num = sizeof(RegisterSaver_LiveRegs) / + sizeof(RegisterSaver::LiveRegType); + const int register_save_size = regstosave_num * reg_size; + const int register_save_offset = frame_size_in_bytes - register_save_size; + + BLOCK_COMMENT("restore_live_registers_and_pop_frame {"); + + // restore all registers (ints and floats) + offset = register_save_offset; + for (int i = 0; i < regstosave_num; i++) { + int reg_num = RegisterSaver_LiveRegs[i].reg_num; + int reg_type = RegisterSaver_LiveRegs[i].reg_type; + + switch (reg_type) { + case RegisterSaver::int_reg: { + if (reg_num != 30) // R30 restored at the end, it's the tmp reg! + __ ld(as_Register(reg_num), offset, R1_SP); + break; + } + case RegisterSaver::float_reg: { + __ lfd(as_FloatRegister(reg_num), offset, R1_SP); + break; + } + case RegisterSaver::special_reg: { + if (reg_num == SR_CTR_SpecialRegisterEnumValue) { + if (restore_ctr) { // Nothing to do here if ctr already contains the next address. + __ ld(R30, offset, R1_SP); + __ mtctr(R30); + } + } else { + Unimplemented(); + } + break; + } + default: + ShouldNotReachHere(); + } + offset += reg_size; + } + + // pop the frame + __ pop_frame(); + + // restore the flags + __ restore_LR_CR(R30); + + // restore scratch register's value + __ ld(R30, -reg_size, R1_SP); + + BLOCK_COMMENT("} restore_live_registers_and_pop_frame"); +} + +void RegisterSaver::push_frame_and_save_argument_registers(MacroAssembler* masm, Register r_temp, + int frame_size,int total_args, const VMRegPair *regs, + const VMRegPair *regs2) { + __ push_frame(frame_size, r_temp); + int st_off = frame_size - wordSize; + for (int i = 0; i < total_args; i++) { + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_Register()) { + Register r = r_1->as_Register(); + __ std(r, st_off, R1_SP); + st_off -= wordSize; + } else if (r_1->is_FloatRegister()) { + FloatRegister f = r_1->as_FloatRegister(); + __ stfd(f, st_off, R1_SP); + st_off -= wordSize; + } + } + if (regs2 != NULL) { + for (int i = 0; i < total_args; i++) { + VMReg r_1 = regs2[i].first(); + VMReg r_2 = regs2[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_Register()) { + Register r = r_1->as_Register(); + __ std(r, st_off, R1_SP); + st_off -= wordSize; + } else if (r_1->is_FloatRegister()) { + FloatRegister f = r_1->as_FloatRegister(); + __ stfd(f, st_off, R1_SP); + st_off -= wordSize; + } + } + } +} + +void RegisterSaver::restore_argument_registers_and_pop_frame(MacroAssembler*masm, int frame_size, + int total_args, const VMRegPair *regs, + const VMRegPair *regs2) { + int st_off = frame_size - wordSize; + for (int i = 0; i < total_args; i++) { + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (r_1->is_Register()) { + Register r = r_1->as_Register(); + __ ld(r, st_off, R1_SP); + st_off -= wordSize; + } else if (r_1->is_FloatRegister()) { + FloatRegister f = r_1->as_FloatRegister(); + __ lfd(f, st_off, R1_SP); + st_off -= wordSize; + } + } + if (regs2 != NULL) + for (int i = 0; i < total_args; i++) { + VMReg r_1 = regs2[i].first(); + VMReg r_2 = regs2[i].second(); + if (r_1->is_Register()) { + Register r = r_1->as_Register(); + __ ld(r, st_off, R1_SP); + st_off -= wordSize; + } else if (r_1->is_FloatRegister()) { + FloatRegister f = r_1->as_FloatRegister(); + __ lfd(f, st_off, R1_SP); + st_off -= wordSize; + } + } + __ pop_frame(); +} + +// Restore the registers that might be holding a result. +void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes) { + int i; + int offset; + const int regstosave_num = sizeof(RegisterSaver_LiveRegs) / + sizeof(RegisterSaver::LiveRegType); + const int register_save_size = regstosave_num * reg_size; + const int register_save_offset = frame_size_in_bytes - register_save_size; + + // restore all result registers (ints and floats) + offset = register_save_offset; + for (int i = 0; i < regstosave_num; i++) { + int reg_num = RegisterSaver_LiveRegs[i].reg_num; + int reg_type = RegisterSaver_LiveRegs[i].reg_type; + switch (reg_type) { + case RegisterSaver::int_reg: { + if (as_Register(reg_num)==R3_RET) // int result_reg + __ ld(as_Register(reg_num), offset, R1_SP); + break; + } + case RegisterSaver::float_reg: { + if (as_FloatRegister(reg_num)==F1_RET) // float result_reg + __ lfd(as_FloatRegister(reg_num), offset, R1_SP); + break; + } + case RegisterSaver::special_reg: { + // Special registers don't hold a result. + break; + } + default: + ShouldNotReachHere(); + } + offset += reg_size; + } +} + +// Is vector's size (in bytes) bigger than a size saved by default? +bool SharedRuntime::is_wide_vector(int size) { + ResourceMark rm; + // Note, MaxVectorSize == 8 on PPC64. + assert(size <= 8, err_msg_res("%d bytes vectors are not supported", size)); + return size > 8; +} +#ifdef COMPILER2 +static int reg2slot(VMReg r) { + return r->reg2stack() + SharedRuntime::out_preserve_stack_slots(); +} + +static int reg2offset(VMReg r) { + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; +} +#endif + +// --------------------------------------------------------------------------- +// Read the array of BasicTypes from a signature, and compute where the +// arguments should go. Values in the VMRegPair regs array refer to 4-byte +// quantities. Values less than VMRegImpl::stack0 are registers, those above +// refer to 4-byte stack slots. All stack slots are based off of the stack pointer +// as framesizes are fixed. +// VMRegImpl::stack0 refers to the first slot 0(sp). +// and VMRegImpl::stack0+1 refers to the memory word 4-bytes higher. Register +// up to RegisterImpl::number_of_registers) are the 64-bit +// integer registers. + +// Note: the INPUTS in sig_bt are in units of Java argument words, which are +// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit +// units regardless of build. Of course for i486 there is no 64 bit build + +// The Java calling convention is a "shifted" version of the C ABI. +// By skipping the first C ABI register we can call non-static jni methods +// with small numbers of arguments without having to shuffle the arguments +// at all. Since we control the java ABI we ought to at least get some +// advantage out of it. + +const VMReg java_iarg_reg[8] = { + R3->as_VMReg(), + R4->as_VMReg(), + R5->as_VMReg(), + R6->as_VMReg(), + R7->as_VMReg(), + R8->as_VMReg(), + R9->as_VMReg(), + R10->as_VMReg() +}; + +const VMReg java_farg_reg[13] = { + F1->as_VMReg(), + F2->as_VMReg(), + F3->as_VMReg(), + F4->as_VMReg(), + F5->as_VMReg(), + F6->as_VMReg(), + F7->as_VMReg(), + F8->as_VMReg(), + F9->as_VMReg(), + F10->as_VMReg(), + F11->as_VMReg(), + F12->as_VMReg(), + F13->as_VMReg() +}; + +const int num_java_iarg_registers = sizeof(java_iarg_reg) / sizeof(java_iarg_reg[0]); +const int num_java_farg_registers = sizeof(java_farg_reg) / sizeof(java_farg_reg[0]); + +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed, + int is_outgoing) { + // C2c calling conventions for compiled-compiled calls. + // Put 8 ints/longs into registers _AND_ 13 float/doubles into + // registers _AND_ put the rest on the stack. + + const int inc_stk_for_intfloat = 1; // 1 slots for ints and floats + const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles + + int i; + VMReg reg; + int stk = 0; + int ireg = 0; + int freg = 0; + + // We put the first 8 arguments into registers and the rest on the + // stack, float arguments are already in their argument registers + // due to c2c calling conventions (see calling_convention). + for (int i = 0; i < total_args_passed; ++i) { + switch(sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (ireg < num_java_iarg_registers) { + // Put int/ptr in register + reg = java_iarg_reg[ireg]; + ++ireg; + } else { + // Put int/ptr on stack. + reg = VMRegImpl::stack2reg(stk); + stk += inc_stk_for_intfloat; + } + regs[i].set1(reg); + break; + case T_LONG: + assert(sig_bt[i+1] == T_VOID, "expecting half"); + if (ireg < num_java_iarg_registers) { + // Put long in register. + reg = java_iarg_reg[ireg]; + ++ireg; + } else { + // Put long on stack. They must be aligned to 2 slots. + if (stk & 0x1) ++stk; + reg = VMRegImpl::stack2reg(stk); + stk += inc_stk_for_longdouble; + } + regs[i].set2(reg); + break; + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + if (ireg < num_java_iarg_registers) { + // Put ptr in register. + reg = java_iarg_reg[ireg]; + ++ireg; + } else { + // Put ptr on stack. Objects must be aligned to 2 slots too, + // because "64-bit pointers record oop-ishness on 2 aligned + // adjacent registers." (see OopFlow::build_oop_map). + if (stk & 0x1) ++stk; + reg = VMRegImpl::stack2reg(stk); + stk += inc_stk_for_longdouble; + } + regs[i].set2(reg); + break; + case T_FLOAT: + if (freg < num_java_farg_registers) { + // Put float in register. + reg = java_farg_reg[freg]; + ++freg; + } else { + // Put float on stack. + reg = VMRegImpl::stack2reg(stk); + stk += inc_stk_for_intfloat; + } + regs[i].set1(reg); + break; + case T_DOUBLE: + assert(sig_bt[i+1] == T_VOID, "expecting half"); + if (freg < num_java_farg_registers) { + // Put double in register. + reg = java_farg_reg[freg]; + ++freg; + } else { + // Put double on stack. They must be aligned to 2 slots. + if (stk & 0x1) ++stk; + reg = VMRegImpl::stack2reg(stk); + stk += inc_stk_for_longdouble; + } + regs[i].set2(reg); + break; + case T_VOID: + // Do not count halves. + regs[i].set_bad(); + break; + default: + ShouldNotReachHere(); + } + } + return round_to(stk, 2); +} + +#ifdef COMPILER2 +// Calling convention for calling C code. +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + VMRegPair *regs2, + int total_args_passed) { + // Calling conventions for C runtime calls and calls to JNI native methods. + // + // PPC64 convention: Hoist the first 8 int/ptr/long's in the first 8 + // int regs, leaving int regs undefined if the arg is flt/dbl. Hoist + // the first 13 flt/dbl's in the first 13 fp regs but additionally + // copy flt/dbl to the stack if they are beyond the 8th argument. + + const VMReg iarg_reg[8] = { + R3->as_VMReg(), + R4->as_VMReg(), + R5->as_VMReg(), + R6->as_VMReg(), + R7->as_VMReg(), + R8->as_VMReg(), + R9->as_VMReg(), + R10->as_VMReg() + }; + + const VMReg farg_reg[13] = { + F1->as_VMReg(), + F2->as_VMReg(), + F3->as_VMReg(), + F4->as_VMReg(), + F5->as_VMReg(), + F6->as_VMReg(), + F7->as_VMReg(), + F8->as_VMReg(), + F9->as_VMReg(), + F10->as_VMReg(), + F11->as_VMReg(), + F12->as_VMReg(), + F13->as_VMReg() + }; + + const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]); + const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]); + + // The first 8 arguments are not passed on the stack. + const int num_args_in_regs = 8; +#define put_arg_in_reg(arg) ((arg) < num_args_in_regs) + + // Check calling conventions consistency. + assert(num_iarg_registers == num_args_in_regs + && num_iarg_registers == 8 + && num_farg_registers == 13, + "consistency"); + + // `Stk' counts stack slots. Due to alignment, 32 bit values occupy + // 2 such slots, like 64 bit values do. + const int inc_stk_for_intfloat = 2; // 2 slots for ints and floats + const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles + + int ill_i = 0; + int ill_t = 0; + int i; + VMReg reg; + // Leave room for C-compatible ABI_112. + int stk = (frame::abi_112_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size; + int arg = 0; + int freg = 0; + + // Avoid passing C arguments in the wrong stack slots. + assert((SharedRuntime::out_preserve_stack_slots() + stk) * VMRegImpl::stack_slot_size == 112, + "passing C arguments in wrong stack slots"); + + // We fill-out regs AND regs2 if an argument must be passed in a + // register AND in a stack slot. If regs2 is NULL in such a + // situation, we bail-out with a fatal error. + for (int i = 0; i < total_args_passed; ++i, ++arg) { + // Initialize regs2 to BAD. + if (regs2 != NULL) regs2[i].set_bad(); + + switch(sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + // We must cast ints to longs and use full 64 bit stack slots + // here. We do the cast in GraphKit::gen_stub() and just guard + // here against loosing that change. + Unimplemented(); // TODO: PPC port + /* + assert(SharedRuntime::c_calling_convention_requires_ints_as_longs(), + "argument of type int should be promoted to type long"); + */ + guarantee(i > 0 && sig_bt[i-1] == T_LONG, + "argument of type (bt) should have been promoted to type (T_LONG,bt) for bt in " + "{T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}"); + // Do not count halves. + regs[i].set_bad(); + --arg; + break; + case T_LONG: + guarantee(sig_bt[i+1] == T_VOID || + sig_bt[i+1] == T_BOOLEAN || sig_bt[i+1] == T_CHAR || + sig_bt[i+1] == T_BYTE || sig_bt[i+1] == T_SHORT || + sig_bt[i+1] == T_INT, + "expecting type (T_LONG,half) or type (T_LONG,bt) with bt in {T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}"); + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: + // Oops are already boxed if required (JNI). + if (put_arg_in_reg(arg)) { + reg = iarg_reg[arg]; + } else { + reg = VMRegImpl::stack2reg(stk); + stk += inc_stk_for_longdouble; + } + regs[i].set2(reg); + break; + case T_FLOAT: + if (put_arg_in_reg(arg)) { + reg = farg_reg[freg]; + } else { + // Put float on stack +# if defined(LINUX) + reg = VMRegImpl::stack2reg(stk+1); +# elif defined(AIX) + reg = VMRegImpl::stack2reg(stk); +# else +# error "unknown OS" +# endif + stk += inc_stk_for_intfloat; + } + + if (freg < num_farg_registers) { + // There are still some float argument registers left. Put the + // float in a register if not already done. + if (reg != farg_reg[freg]) { + guarantee(regs2 != NULL, "must pass float in register and stack slot"); + VMReg reg2 = farg_reg[freg]; + regs2[i].set1(reg2); + } + ++freg; + } + + regs[i].set1(reg); + break; + case T_DOUBLE: + assert(sig_bt[i+1] == T_VOID, "expecting half"); + if (put_arg_in_reg(arg)) { + reg = farg_reg[freg]; + } else { + // Put double on stack. + reg = VMRegImpl::stack2reg(stk); + stk += inc_stk_for_longdouble; + } + + if (freg < num_farg_registers) { + // There are still some float argument registers left. Put the + // float in a register if not already done. + if (reg != farg_reg[freg]) { + guarantee(regs2 != NULL, "must pass float in register and stack slot"); + VMReg reg2 = farg_reg[freg]; + regs2[i].set2(reg2); + } + ++freg; + } + + regs[i].set2(reg); + break; + case T_VOID: + // Do not count halves. + regs[i].set_bad(); + --arg; + break; + default: + ShouldNotReachHere(); + } + } + + return round_to(stk, 2); +} +#endif // COMPILER2 + +static address gen_c2i_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + Label& call_interpreter, + const Register& ientry) { + + address c2i_entrypoint; + + const Register sender_SP = R21_sender_SP; // == R21_tmp1 + const Register code = R22_tmp2; + //const Register ientry = R23_tmp3; + const Register value_regs[] = { R24_tmp4, R25_tmp5, R26_tmp6 }; + const int num_value_regs = sizeof(value_regs) / sizeof(Register); + int value_regs_index = 0; + + const Register return_pc = R27_tmp7; + const Register tmp = R28_tmp8; + + assert_different_registers(sender_SP, code, ientry, return_pc, tmp); + + // Adapter needs TOP_IJAVA_FRAME_ABI. + const int adapter_size = frame::top_ijava_frame_abi_size + + round_to(total_args_passed * wordSize, frame::alignment_in_bytes); + + + // regular (verified) c2i entry point + c2i_entrypoint = __ pc(); + + // Does compiled code exists? If yes, patch the caller's callsite. + __ ld(code, method_(code)); + __ cmpdi(CCR0, code, 0); + __ ld(ientry, method_(interpreter_entry)); // preloaded + __ beq(CCR0, call_interpreter); + + + // Patch caller's callsite, method_(code) was not NULL which means that + // compiled code exists. + __ mflr(return_pc); + __ std(return_pc, _abi(lr), R1_SP); + RegisterSaver::push_frame_and_save_argument_registers(masm, tmp, adapter_size, total_args_passed, regs); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), R19_method, return_pc); + + RegisterSaver::restore_argument_registers_and_pop_frame(masm, adapter_size, total_args_passed, regs); + __ ld(return_pc, _abi(lr), R1_SP); + __ ld(ientry, method_(interpreter_entry)); // preloaded + __ mtlr(return_pc); + + + // call the interpreter + __ BIND(call_interpreter); + __ mtctr(ientry); + + // Get a copy of the current SP for loading caller's arguments. + __ mr(sender_SP, R1_SP); + + // Add space for the adapter. + __ resize_frame(-adapter_size, R12_scratch2); + + int st_off = adapter_size - wordSize; + + // Write the args into the outgoing interpreter space. + for (int i = 0; i < total_args_passed; i++) { + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + Register tmp_reg = value_regs[value_regs_index]; + value_regs_index = (value_regs_index + 1) % num_value_regs; + // The calling convention produces OptoRegs that ignore the out + // preserve area (JIT's ABI). We must account for it here. + int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; + if (!r_2->is_valid()) { + __ lwz(tmp_reg, ld_off, sender_SP); + } else { + __ ld(tmp_reg, ld_off, sender_SP); + } + // Pretend stack targets were loaded into tmp_reg. + r_1 = tmp_reg->as_VMReg(); + } + + if (r_1->is_Register()) { + Register r = r_1->as_Register(); + if (!r_2->is_valid()) { + __ stw(r, st_off, R1_SP); + st_off-=wordSize; + } else { + // Longs are given 2 64-bit slots in the interpreter, but the + // data is passed in only 1 slot. + if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); ) + st_off-=wordSize; + } + __ std(r, st_off, R1_SP); + st_off-=wordSize; + } + } else { + assert(r_1->is_FloatRegister(), ""); + FloatRegister f = r_1->as_FloatRegister(); + if (!r_2->is_valid()) { + __ stfs(f, st_off, R1_SP); + st_off-=wordSize; + } else { + // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the + // data is passed in only 1 slot. + // One of these should get known junk... + DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); ) + st_off-=wordSize; + __ stfd(f, st_off, R1_SP); + st_off-=wordSize; + } + } + } + + // Jump to the interpreter just as if interpreter was doing it. + + // load TOS + __ addi(R17_tos, R1_SP, st_off); + + // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in R21_tmp1. + assert(sender_SP == R21_sender_SP, "passing initial caller's SP in wrong register"); + __ bctr(); + + return c2i_entrypoint; +} + +static void gen_i2c_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs) { + + // Load method's entry-point from methodOop. + __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method); + __ mtctr(R12_scratch2); + + // We will only enter here from an interpreted frame and never from after + // passing thru a c2i. Azul allowed this but we do not. If we lose the + // race and use a c2i we will remain interpreted for the race loser(s). + // This removes all sorts of headaches on the x86 side and also eliminates + // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. + + // Note: r13 contains the senderSP on entry. We must preserve it since + // we may do a i2c -> c2i transition if we lose a race where compiled + // code goes non-entrant while we get args ready. + // In addition we use r13 to locate all the interpreter args as + // we must align the stack to 16 bytes on an i2c entry else we + // lose alignment we expect in all compiled code and register + // save code can segv when fxsave instructions find improperly + // aligned stack pointer. + + const Register ld_ptr = R17_tos; + const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 }; + const int num_value_regs = sizeof(value_regs) / sizeof(Register); + int value_regs_index = 0; + + int ld_offset = total_args_passed*wordSize; + + // Cut-out for having no stack args. Since up to 2 int/oop args are passed + // in registers, we will occasionally have no stack args. + int comp_words_on_stack = 0; + if (comp_args_on_stack) { + // Sig words on the stack are greater-than VMRegImpl::stack0. Those in + // registers are below. By subtracting stack0, we either get a negative + // number (all values in registers) or the maximum stack slot accessed. + + // Convert 4-byte c2 stack slots to words. + comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; + // Round up to miminum stack alignment, in wordSize. + comp_words_on_stack = round_to(comp_words_on_stack, 2); + __ resize_frame(-comp_words_on_stack * wordSize, R11_scratch1); + } + + // Now generate the shuffle code. Pick up all register args and move the + // rest through register value=Z_R12. + BLOCK_COMMENT("Shuffle arguments"); + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // Pick up 0, 1 or 2 words from ld_ptr. + assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), + "scrambled load targets?"); + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_FloatRegister()) { + if (!r_2->is_valid()) { + __ lfs(r_1->as_FloatRegister(), ld_offset, ld_ptr); + ld_offset-=wordSize; + } else { + // Skip the unused interpreter slot. + __ lfd(r_1->as_FloatRegister(), ld_offset-wordSize, ld_ptr); + ld_offset-=2*wordSize; + } + } else { + Register r; + if (r_1->is_stack()) { + // Must do a memory to memory move thru "value". + r = value_regs[value_regs_index]; + value_regs_index = (value_regs_index + 1) % num_value_regs; + } else { + r = r_1->as_Register(); + } + if (!r_2->is_valid()) { + // Not sure we need to do this but it shouldn't hurt. + if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ADDRESS || sig_bt[i] == T_ARRAY) { + __ ld(r, ld_offset, ld_ptr); + ld_offset-=wordSize; + } else { + __ lwz(r, ld_offset, ld_ptr); + ld_offset-=wordSize; + } + } else { + // In 64bit, longs are given 2 64-bit slots in the interpreter, but the + // data is passed in only 1 slot. + if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + ld_offset-=wordSize; + } + __ ld(r, ld_offset, ld_ptr); + ld_offset-=wordSize; + } + + if (r_1->is_stack()) { + // Now store value where the compiler expects it + int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots())*VMRegImpl::stack_slot_size; + + if (sig_bt[i] == T_INT || sig_bt[i] == T_FLOAT ||sig_bt[i] == T_BOOLEAN || + sig_bt[i] == T_SHORT || sig_bt[i] == T_CHAR || sig_bt[i] == T_BYTE) { + __ stw(r, st_off, R1_SP); + } else { + __ std(r, st_off, R1_SP); + } + } + } + } + + BLOCK_COMMENT("Store method oop"); + // Store method oop into thread->callee_target. + // We might end up in handle_wrong_method if the callee is + // deoptimized as we race thru here. If that happens we don't want + // to take a safepoint because the caller frame will look + // interpreted and arguments are now "compiled" so it is much better + // to make this transition invisible to the stack walking + // code. Unfortunately if we try and find the callee by normal means + // a safepoint is possible. So we stash the desired callee in the + // thread and the vm will find there should this case occur. + __ std(R19_method, thread_(callee_target)); + + // Jump to the compiled code just as if compiled code was doing it. + __ bctr(); +} + +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterFingerPrint* fingerprint) { + address i2c_entry; + address c2i_unverified_entry; + address c2i_entry; + + + // entry: i2c + + __ align(CodeEntryAlignment); + i2c_entry = __ pc(); + gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + + + // entry: c2i unverified + + __ align(CodeEntryAlignment); + BLOCK_COMMENT("c2i unverified entry"); + c2i_unverified_entry = __ pc(); + + // inline_cache contains a compiledICHolder + const Register ic = R19_method; + const Register ic_klass = R11_scratch1; + const Register receiver_klass = R12_scratch2; + const Register code = R21_tmp1; + const Register ientry = R23_tmp3; + + assert_different_registers(ic, ic_klass, receiver_klass, R3_ARG1, code, ientry); + assert(R11_scratch1 == R11, "need prologue scratch register"); + + Label call_interpreter; + + assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), + "klass offset should reach into any page"); + // Check for NULL argument if we don't have implicit null checks. + if (!ImplicitNullChecks NOT_LINUX(|| true) /*!os::zero_page_read_protected()*/) { + if (TrapBasedNullChecks) { + __ trap_null_check(R3_ARG1); + } else { + Label valid; + __ cmpdi(CCR0, R3_ARG1, 0); + __ bne_predict_taken(CCR0, valid); + // We have a null argument, branch to ic_miss_stub. + __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), + relocInfo::runtime_call_type); + __ BIND(valid); + } + } + // Assume argument is not NULL, load klass from receiver. + __ load_klass(receiver_klass, R3_ARG1); + + __ ld(ic_klass, CompiledICHolder::holder_klass_offset(), ic); + + if (TrapBasedICMissChecks) { + __ trap_ic_miss_check(receiver_klass, ic_klass); + } else { + Label valid; + __ cmpd(CCR0, receiver_klass, ic_klass); + __ beq_predict_taken(CCR0, valid); + // We have an unexpected klass, branch to ic_miss_stub. + __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), + relocInfo::runtime_call_type); + __ BIND(valid); + } + + // Argument is valid and klass is as expected, continue. + + // Extract method from inline cache, verified entry point needs it. + __ ld(R19_method, CompiledICHolder::holder_method_offset(), ic); + assert(R19_method == ic, "the inline cache register is dead here"); + + __ ld(code, method_(code)); + __ cmpdi(CCR0, code, 0); + __ ld(ientry, method_(interpreter_entry)); // preloaded + __ beq_predict_taken(CCR0, call_interpreter); + + // Branch to ic_miss_stub. + __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), + relocInfo::runtime_call_type); + + // entry: c2i + + c2i_entry = gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, call_interpreter, ientry); + + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); +} + +#ifdef COMPILER2 +// An oop arg. Must pass a handle not the oop itself. +static void object_move(MacroAssembler* masm, + int frame_size_in_slots, + OopMap* oop_map, int oop_handle_offset, + bool is_receiver, int* receiver_offset, + VMRegPair src, VMRegPair dst, + Register r_caller_sp, Register r_temp_1, Register r_temp_2) { + assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), + "receiver has already been moved"); + + // We must pass a handle. First figure out the location we use as a handle. + + if (src.first()->is_stack()) { + // stack to stack or reg + + const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register(); + Label skip; + const int oop_slot_in_callers_frame = reg2slot(src.first()); + + guarantee(!is_receiver, "expecting receiver in register"); + oop_map->set_oop(VMRegImpl::stack2reg(oop_slot_in_callers_frame + frame_size_in_slots)); + + __ addi(r_handle, r_caller_sp, reg2offset(src.first())); + __ ld( r_temp_2, reg2offset(src.first()), r_caller_sp); + __ cmpdi(CCR0, r_temp_2, 0); + __ bne(CCR0, skip); + // Use a NULL handle if oop is NULL. + __ li(r_handle, 0); + __ bind(skip); + + if (dst.first()->is_stack()) { + // stack to stack + __ std(r_handle, reg2offset(dst.first()), R1_SP); + } else { + // stack to reg + // Nothing to do, r_handle is already the dst register. + } + } else { + // reg to stack or reg + const Register r_oop = src.first()->as_Register(); + const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register(); + const int oop_slot = (r_oop->encoding()-R3_ARG1->encoding()) * VMRegImpl::slots_per_word + + oop_handle_offset; // in slots + const int oop_offset = oop_slot * VMRegImpl::stack_slot_size; + Label skip; + + if (is_receiver) { + *receiver_offset = oop_offset; + } + oop_map->set_oop(VMRegImpl::stack2reg(oop_slot)); + + __ std( r_oop, oop_offset, R1_SP); + __ addi(r_handle, R1_SP, oop_offset); + + __ cmpdi(CCR0, r_oop, 0); + __ bne(CCR0, skip); + // Use a NULL handle if oop is NULL. + __ li(r_handle, 0); + __ bind(skip); + + if (dst.first()->is_stack()) { + // reg to stack + __ std(r_handle, reg2offset(dst.first()), R1_SP); + } else { + // reg to reg + // Nothing to do, r_handle is already the dst register. + } + } +} + +static void int_move(MacroAssembler*masm, + VMRegPair src, VMRegPair dst, + Register r_caller_sp, Register r_temp) { + assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be long-int"); + assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long"); + + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ lwa(r_temp, reg2offset(src.first()), r_caller_sp); + __ std(r_temp, reg2offset(dst.first()), R1_SP); + } else { + // stack to reg + __ lwa(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ extsw(r_temp, src.first()->as_Register()); + __ std(r_temp, reg2offset(dst.first()), R1_SP); + } else { + // reg to reg + __ extsw(dst.first()->as_Register(), src.first()->as_Register()); + } +} + +static void long_move(MacroAssembler*masm, + VMRegPair src, VMRegPair dst, + Register r_caller_sp, Register r_temp) { + assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be long"); + assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long"); + + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ ld( r_temp, reg2offset(src.first()), r_caller_sp); + __ std(r_temp, reg2offset(dst.first()), R1_SP); + } else { + // stack to reg + __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP); + } else { + // reg to reg + if (dst.first()->as_Register() != src.first()->as_Register()) + __ mr(dst.first()->as_Register(), src.first()->as_Register()); + } +} + +static void float_move(MacroAssembler*masm, + VMRegPair src, VMRegPair dst, + Register r_caller_sp, Register r_temp) { + assert(src.first()->is_valid() && !src.second()->is_valid(), "incoming must be float"); + assert(dst.first()->is_valid() && !dst.second()->is_valid(), "outgoing must be float"); + + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ lwz(r_temp, reg2offset(src.first()), r_caller_sp); + __ stw(r_temp, reg2offset(dst.first()), R1_SP); + } else { + // stack to reg + __ lfs(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ stfs(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP); + } else { + // reg to reg + if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister()) + __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } +} + +static void double_move(MacroAssembler*masm, + VMRegPair src, VMRegPair dst, + Register r_caller_sp, Register r_temp) { + assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be double"); + assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be double"); + + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ ld( r_temp, reg2offset(src.first()), r_caller_sp); + __ std(r_temp, reg2offset(dst.first()), R1_SP); + } else { + // stack to reg + __ lfd(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ stfd(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP); + } else { + // reg to reg + if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister()) + __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } +} + +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + switch (ret_type) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + __ stw (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); + break; + case T_ARRAY: + case T_OBJECT: + case T_LONG: + __ std (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); + break; + case T_FLOAT: + __ stfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); + break; + case T_DOUBLE: + __ stfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); + break; + case T_VOID: + break; + default: + ShouldNotReachHere(); + break; + } +} + +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + switch (ret_type) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + __ lwz(R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); + break; + case T_ARRAY: + case T_OBJECT: + case T_LONG: + __ ld (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); + break; + case T_FLOAT: + __ lfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); + break; + case T_DOUBLE: + __ lfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); + break; + case T_VOID: + break; + default: + ShouldNotReachHere(); + break; + } +} + +static void save_or_restore_arguments(MacroAssembler* masm, + const int stack_slots, + const int total_in_args, + const int arg_save_area, + OopMap* map, + VMRegPair* in_regs, + BasicType* in_sig_bt) { + // If map is non-NULL then the code should store the values, + // otherwise it should load them. + int slot = arg_save_area; + // Save down double word first. + for (int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) { + int offset = slot * VMRegImpl::stack_slot_size; + slot += VMRegImpl::slots_per_word; + assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)"); + if (map != NULL) { + __ stfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP); + } else { + __ lfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP); + } + } else if (in_regs[i].first()->is_Register() && + (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) { + int offset = slot * VMRegImpl::stack_slot_size; + if (map != NULL) { + __ std(in_regs[i].first()->as_Register(), offset, R1_SP); + if (in_sig_bt[i] == T_ARRAY) { + map->set_oop(VMRegImpl::stack2reg(slot)); + } + } else { + __ ld(in_regs[i].first()->as_Register(), offset, R1_SP); + } + slot += VMRegImpl::slots_per_word; + assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)"); + } + } + // Save or restore single word registers. + for (int i = 0; i < total_in_args; i++) { + // PPC64: pass ints as longs: must only deal with floats here. + if (in_regs[i].first()->is_FloatRegister()) { + if (in_sig_bt[i] == T_FLOAT) { + int offset = slot * VMRegImpl::stack_slot_size; + slot++; + assert(slot <= stack_slots, "overflow (after FLOAT stack slot)"); + if (map != NULL) { + __ stfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP); + } else { + __ lfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP); + } + } + } else if (in_regs[i].first()->is_stack()) { + if (in_sig_bt[i] == T_ARRAY && map != NULL) { + int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); + } + } + } +} + +// Check GC_locker::needs_gc and enter the runtime if it's true. This +// keeps a new JNI critical region from starting until a GC has been +// forced. Save down any oops in registers and describe them in an +// OopMap. +static void check_needs_gc_for_critical_native(MacroAssembler* masm, + const int stack_slots, + const int total_in_args, + const int arg_save_area, + OopMapSet* oop_maps, + VMRegPair* in_regs, + BasicType* in_sig_bt, + Register tmp_reg ) { + __ block_comment("check GC_locker::needs_gc"); + Label cont; + __ lbz(tmp_reg, (RegisterOrConstant)(intptr_t)GC_locker::needs_gc_address()); + __ cmplwi(CCR0, tmp_reg, 0); + __ beq(CCR0, cont); + + // Save down any values that are live in registers and call into the + // runtime to halt for a GC. + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + save_or_restore_arguments(masm, stack_slots, total_in_args, + arg_save_area, map, in_regs, in_sig_bt); + + __ mr(R3_ARG1, R16_thread); + __ set_last_Java_frame(R1_SP, noreg); + + __ block_comment("block_for_jni_critical"); + address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical); + __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, entry_point), relocInfo::runtime_call_type); + address start = __ pc() - __ offset(), + calls_return_pc = __ last_calls_return_pc(); + oop_maps->add_gc_map(calls_return_pc - start, map); + + __ reset_last_Java_frame(); + + // Reload all the register arguments. + save_or_restore_arguments(masm, stack_slots, total_in_args, + arg_save_area, NULL, in_regs, in_sig_bt); + + __ BIND(cont); + +#ifdef ASSERT + if (StressCriticalJNINatives) { + // Stress register saving. + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + save_or_restore_arguments(masm, stack_slots, total_in_args, + arg_save_area, map, in_regs, in_sig_bt); + // Destroy argument registers. + for (int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_Register()) { + const Register reg = in_regs[i].first()->as_Register(); + __ neg(reg, reg); + } else if (in_regs[i].first()->is_FloatRegister()) { + __ fneg(in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister()); + } + } + + save_or_restore_arguments(masm, stack_slots, total_in_args, + arg_save_area, NULL, in_regs, in_sig_bt); + } +#endif +} + +static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst, Register r_caller_sp, Register r_temp) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ ld(r_temp, reg2offset(src.first()), r_caller_sp); + __ std(r_temp, reg2offset(dst.first()), R1_SP); + } else { + // stack to reg + __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP); + } else { + if (dst.first() != src.first()) { + __ mr(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} + +// Unpack an array argument into a pointer to the body and the length +// if the array is non-null, otherwise pass 0 for both. +static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, + VMRegPair body_arg, VMRegPair length_arg, Register r_caller_sp, + Register tmp_reg, Register tmp2_reg) { + assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg, + "possible collision"); + assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg, + "possible collision"); + + // Pass the length, ptr pair. + Label set_out_args; + VMRegPair tmp, tmp2; + tmp.set_ptr(tmp_reg->as_VMReg()); + tmp2.set_ptr(tmp2_reg->as_VMReg()); + if (reg.first()->is_stack()) { + // Load the arg up from the stack. + move_ptr(masm, reg, tmp, r_caller_sp, /*unused*/ R0); + reg = tmp; + } + __ li(tmp2_reg, 0); // Pass zeros if Array=null. + if (tmp_reg != reg.first()->as_Register()) __ li(tmp_reg, 0); + __ cmpdi(CCR0, reg.first()->as_Register(), 0); + __ beq(CCR0, set_out_args); + __ lwa(tmp2_reg, arrayOopDesc::length_offset_in_bytes(), reg.first()->as_Register()); + __ addi(tmp_reg, reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type)); + __ bind(set_out_args); + move_ptr(masm, tmp, body_arg, r_caller_sp, /*unused*/ R0); + move_ptr(masm, tmp2, length_arg, r_caller_sp, /*unused*/ R0); // Same as move32_64 on PPC64. +} + +static void verify_oop_args(MacroAssembler* masm, + methodHandle method, + const BasicType* sig_bt, + const VMRegPair* regs) { + Register temp_reg = R19_method; // not part of any compiled calling seq + if (VerifyOops) { + for (int i = 0; i < method->size_of_parameters(); i++) { + if (sig_bt[i] == T_OBJECT || + sig_bt[i] == T_ARRAY) { + VMReg r = regs[i].first(); + assert(r->is_valid(), "bad oop arg"); + if (r->is_stack()) { + __ ld(temp_reg, reg2offset(r), R1_SP); + __ verify_oop(temp_reg); + } else { + __ verify_oop(r->as_Register()); + } + } + } + } +} + +static void gen_special_dispatch(MacroAssembler* masm, + methodHandle method, + const BasicType* sig_bt, + const VMRegPair* regs) { + verify_oop_args(masm, method, sig_bt, regs); + vmIntrinsics::ID iid = method->intrinsic_id(); + + // Now write the args into the outgoing interpreter space + bool has_receiver = false; + Register receiver_reg = noreg; + int member_arg_pos = -1; + Register member_reg = noreg; + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); + if (ref_kind != 0) { + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument + member_reg = R19_method; // known to be free at this point + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); + } else if (iid == vmIntrinsics::_invokeBasic) { + has_receiver = true; + } else { + fatal(err_msg_res("unexpected intrinsic id %d", iid)); + } + + if (member_reg != noreg) { + // Load the member_arg into register, if necessary. + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); + VMReg r = regs[member_arg_pos].first(); + if (r->is_stack()) { + __ ld(member_reg, reg2offset(r), R1_SP); + } else { + // no data motion is needed + member_reg = r->as_Register(); + } + } + + if (has_receiver) { + // Make sure the receiver is loaded into a register. + assert(method->size_of_parameters() > 0, "oob"); + assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); + VMReg r = regs[0].first(); + assert(r->is_valid(), "bad receiver arg"); + if (r->is_stack()) { + // Porting note: This assumes that compiled calling conventions always + // pass the receiver oop in a register. If this is not true on some + // platform, pick a temp and load the receiver from stack. + fatal("receiver always in a register"); + receiver_reg = R11_scratch1; // TODO (hs24): is R11_scratch1 really free at this point? + __ ld(receiver_reg, reg2offset(r), R1_SP); + } else { + // no data motion is needed + receiver_reg = r->as_Register(); + } + } + + // Figure out which address we are really jumping to: + MethodHandles::generate_method_handle_dispatch(masm, iid, + receiver_reg, member_reg, /*for_compiler_entry:*/ true); +} + +#endif // COMPILER2 + +// --------------------------------------------------------------------------- +// Generate a native wrapper for a given method. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// convention (handlizes oops, etc), transitions to native, makes the call, +// returns to java state (possibly blocking), unhandlizes any result and +// returns. +// +// Critical native functions are a shorthand for the use of +// GetPrimtiveArrayCritical and disallow the use of any other JNI +// functions. The wrapper is expected to unpack the arguments before +// passing them to the callee and perform checks before and after the +// native call to ensure that they GC_locker +// lock_critical/unlock_critical semantics are followed. Some other +// parts of JNI setup are skipped like the tear down of the JNI handle +// block and the check for pending exceptions it's impossible for them +// to be thrown. +// +// They are roughly structured like this: +// if (GC_locker::needs_gc()) +// SharedRuntime::block_for_jni_critical(); +// tranistion to thread_in_native +// unpack arrray arguments and call native entry point +// check for safepoint in progress +// check if any thread suspend flags are set +// call into JVM and possible unlock the JNI critical +// if a GC was suppressed while in the critical native. +// transition back to thread_in_Java +// return to caller +// +nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, + methodHandle method, + int compile_id, + BasicType *in_sig_bt, + VMRegPair *in_regs, + BasicType ret_type) { +#ifdef COMPILER2 + if (method->is_method_handle_intrinsic()) { + vmIntrinsics::ID iid = method->intrinsic_id(); + intptr_t start = (intptr_t)__ pc(); + int vep_offset = ((intptr_t)__ pc()) - start; + gen_special_dispatch(masm, + method, + in_sig_bt, + in_regs); + int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period + __ flush(); + int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually + return nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + in_ByteSize(-1), + in_ByteSize(-1), + (OopMapSet*)NULL); + } + + bool is_critical_native = true; + address native_func = method->critical_native_function(); + if (native_func == NULL) { + native_func = method->native_function(); + is_critical_native = false; + } + assert(native_func != NULL, "must have function"); + + // First, create signature for outgoing C call + // -------------------------------------------------------------------------- + + int total_in_args = method->size_of_parameters(); + // We have received a description of where all the java args are located + // on entry to the wrapper. We need to convert these args to where + // the jni function will expect them. To figure out where they go + // we convert the java signature to a C signature by inserting + // the hidden arguments as arg[0] and possibly arg[1] (static method) + // + // Additionally, on ppc64 we must convert integers to longs in the C + // signature. We do this in advance in order to have no trouble with + // indexes into the bt-arrays. + // So convert the signature and registers now, and adjust the total number + // of in-arguments accordingly. + int i2l_argcnt = convert_ints_to_longints_argcnt(total_in_args, in_sig_bt); // PPC64: pass ints as longs. + + // Calculate the total number of C arguments and create arrays for the + // signature and the outgoing registers. + // On ppc64, we have two arrays for the outgoing registers, because + // some floating-point arguments must be passed in registers _and_ + // in stack locations. + bool method_is_static = method->is_static(); + int total_c_args = i2l_argcnt; + + if (!is_critical_native) { + int n_hidden_args = method_is_static ? 2 : 1; + total_c_args += n_hidden_args; + } else { + // No JNIEnv*, no this*, but unpacked arrays (base+length). + for (int i = 0; i < total_in_args; i++) { + if (in_sig_bt[i] == T_ARRAY) { + total_c_args += 2; // PPC64: T_LONG, T_INT, T_ADDRESS (see convert_ints_to_longints and c_calling_convention) + } + } + } + + BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); + VMRegPair *out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); + VMRegPair *out_regs2 = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); + BasicType* in_elem_bt = NULL; + + // Create the signature for the C call: + // 1) add the JNIEnv* + // 2) add the class if the method is static + // 3) copy the rest of the incoming signature (shifted by the number of + // hidden arguments). + + int argc = 0; + if (!is_critical_native) { + convert_ints_to_longints(i2l_argcnt, total_in_args, in_sig_bt, in_regs); // PPC64: pass ints as longs. + + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } + + for (int i = 0; i < total_in_args ; i++ ) { + out_sig_bt[argc++] = in_sig_bt[i]; + } + } else { + Thread* THREAD = Thread::current(); + in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, i2l_argcnt); + SignatureStream ss(method->signature()); + int o = 0; + for (int i = 0; i < total_in_args ; i++, o++) { + if (in_sig_bt[i] == T_ARRAY) { + // Arrays are passed as int, elem* pair + Symbol* atype = ss.as_symbol(CHECK_NULL); + const char* at = atype->as_C_string(); + if (strlen(at) == 2) { + assert(at[0] == '[', "must be"); + switch (at[1]) { + case 'B': in_elem_bt[o] = T_BYTE; break; + case 'C': in_elem_bt[o] = T_CHAR; break; + case 'D': in_elem_bt[o] = T_DOUBLE; break; + case 'F': in_elem_bt[o] = T_FLOAT; break; + case 'I': in_elem_bt[o] = T_INT; break; + case 'J': in_elem_bt[o] = T_LONG; break; + case 'S': in_elem_bt[o] = T_SHORT; break; + case 'Z': in_elem_bt[o] = T_BOOLEAN; break; + default: ShouldNotReachHere(); + } + } + } else { + in_elem_bt[o] = T_VOID; + switch(in_sig_bt[i]) { // PPC64: pass ints as longs. + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: in_elem_bt[++o] = T_VOID; break; + default: break; + } + } + if (in_sig_bt[i] != T_VOID) { + assert(in_sig_bt[i] == ss.type(), "must match"); + ss.next(); + } + } + assert(i2l_argcnt==o, "must match"); + + convert_ints_to_longints(i2l_argcnt, total_in_args, in_sig_bt, in_regs); // PPC64: pass ints as longs. + + for (int i = 0; i < total_in_args ; i++ ) { + if (in_sig_bt[i] == T_ARRAY) { + // Arrays are passed as int, elem* pair. + out_sig_bt[argc++] = T_LONG; // PPC64: pass ints as longs. + out_sig_bt[argc++] = T_INT; + out_sig_bt[argc++] = T_ADDRESS; + } else { + out_sig_bt[argc++] = in_sig_bt[i]; + } + } + } + + + // Compute the wrapper's frame size. + // -------------------------------------------------------------------------- + + // Now figure out where the args must be stored and how much stack space + // they require. + // + // Compute framesize for the wrapper. We need to handlize all oops in + // incoming registers. + // + // Calculate the total number of stack slots we will need: + // 1) abi requirements + // 2) outgoing arguments + // 3) space for inbound oop handle area + // 4) space for handlizing a klass if static method + // 5) space for a lock if synchronized method + // 6) workspace for saving return values, int <-> float reg moves, etc. + // 7) alignment + // + // Layout of the native wrapper frame: + // (stack grows upwards, memory grows downwards) + // + // NW [ABI_112] <-- 1) R1_SP + // [outgoing arguments] <-- 2) R1_SP + out_arg_slot_offset + // [oopHandle area] <-- 3) R1_SP + oop_handle_offset (save area for critical natives) + // klass <-- 4) R1_SP + klass_offset + // lock <-- 5) R1_SP + lock_offset + // [workspace] <-- 6) R1_SP + workspace_offset + // [alignment] (optional) <-- 7) + // caller [JIT_TOP_ABI_48] <-- r_callers_sp + // + // - *_slot_offset Indicates offset from SP in number of stack slots. + // - *_offset Indicates offset from SP in bytes. + + int stack_slots = c_calling_convention(out_sig_bt, out_regs, out_regs2, total_c_args) // 1+2) + + SharedRuntime::out_preserve_stack_slots(); // See c_calling_convention. + + // Now the space for the inbound oop handle area. + int total_save_slots = num_java_iarg_registers * VMRegImpl::slots_per_word; + if (is_critical_native) { + // Critical natives may have to call out so they need a save area + // for register arguments. + int double_slots = 0; + int single_slots = 0; + for (int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_Register()) { + const Register reg = in_regs[i].first()->as_Register(); + switch (in_sig_bt[i]) { + case T_BOOLEAN: + case T_BYTE: + case T_SHORT: + case T_CHAR: + case T_INT: /*single_slots++;*/ break; // PPC64: pass ints as longs. + case T_ARRAY: + case T_LONG: double_slots++; break; + default: ShouldNotReachHere(); + } + } else if (in_regs[i].first()->is_FloatRegister()) { + switch (in_sig_bt[i]) { + case T_FLOAT: single_slots++; break; + case T_DOUBLE: double_slots++; break; + default: ShouldNotReachHere(); + } + } + } + total_save_slots = double_slots * 2 + round_to(single_slots, 2); // round to even + } + + int oop_handle_slot_offset = stack_slots; + stack_slots += total_save_slots; // 3) + + int klass_slot_offset = 0; + int klass_offset = -1; + if (method_is_static && !is_critical_native) { // 4) + klass_slot_offset = stack_slots; + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; + stack_slots += VMRegImpl::slots_per_word; + } + + int lock_slot_offset = 0; + int lock_offset = -1; + if (method->is_synchronized()) { // 5) + lock_slot_offset = stack_slots; + lock_offset = lock_slot_offset * VMRegImpl::stack_slot_size; + stack_slots += VMRegImpl::slots_per_word; + } + + int workspace_slot_offset = stack_slots; // 6) + stack_slots += 2; + + // Now compute actual number of stack words we need. + // Rounding to make stack properly aligned. + stack_slots = round_to(stack_slots, // 7) + frame::alignment_in_bytes / VMRegImpl::stack_slot_size); + int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size; + + + // Now we can start generating code. + // -------------------------------------------------------------------------- + + intptr_t start_pc = (intptr_t)__ pc(); + intptr_t vep_start_pc; + intptr_t frame_done_pc; + intptr_t oopmap_pc; + + Label ic_miss; + Label handle_pending_exception; + + Register r_callers_sp = R21; + Register r_temp_1 = R22; + Register r_temp_2 = R23; + Register r_temp_3 = R24; + Register r_temp_4 = R25; + Register r_temp_5 = R26; + Register r_temp_6 = R27; + Register r_return_pc = R28; + + Register r_carg1_jnienv = noreg; + Register r_carg2_classorobject = noreg; + if (!is_critical_native) { + r_carg1_jnienv = out_regs[0].first()->as_Register(); + r_carg2_classorobject = out_regs[1].first()->as_Register(); + } + + + // Generate the Unverified Entry Point (UEP). + // -------------------------------------------------------------------------- + assert(start_pc == (intptr_t)__ pc(), "uep must be at start"); + + // Check ic: object class == cached class? + if (!method_is_static) { + Register ic = as_Register(Matcher::inline_cache_reg_encode()); + Register receiver_klass = r_temp_1; + + __ cmpdi(CCR0, R3_ARG1, 0); + __ beq(CCR0, ic_miss); + __ verify_oop(R3_ARG1); + __ load_klass(receiver_klass, R3_ARG1); + + __ cmpd(CCR0, receiver_klass, ic); + __ bne(CCR0, ic_miss); + } + + + // Generate the Verified Entry Point (VEP). + // -------------------------------------------------------------------------- + vep_start_pc = (intptr_t)__ pc(); + + __ save_LR_CR(r_temp_1); + __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame. + __ mr(r_callers_sp, R1_SP); // Remember frame pointer. + __ push_frame(frame_size_in_bytes, r_temp_1); // Push the c2n adapter's frame. + frame_done_pc = (intptr_t)__ pc(); + + // Native nmethod wrappers never take possesion of the oop arguments. + // So the caller will gc the arguments. + // The only thing we need an oopMap for is if the call is static. + // + // An OopMap for lock (and class if static), and one for the VM call itself. + OopMapSet *oop_maps = new OopMapSet(); + OopMap *oop_map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + + if (is_critical_native) { + check_needs_gc_for_critical_native(masm, stack_slots, total_in_args, oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt, r_temp_1); + } + + // Move arguments from register/stack to register/stack. + // -------------------------------------------------------------------------- + // + // We immediately shuffle the arguments so that for any vm call we have + // to make from here on out (sync slow path, jvmti, etc.) we will have + // captured the oops from our caller and have a valid oopMap for them. + // + // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* + // (derived from JavaThread* which is in R16_thread) and, if static, + // the class mirror instead of a receiver. This pretty much guarantees that + // register layout will not match. We ignore these extra arguments during + // the shuffle. The shuffle is described by the two calling convention + // vectors we have in our possession. We simply walk the java vector to + // get the source locations and the c vector to get the destinations. + + // Record sp-based slot for receiver on stack for non-static methods. + int receiver_offset = -1; + + // We move the arguments backward because the floating point registers + // destination will always be to a register with a greater or equal + // register number or the stack. + // in is the index of the incoming Java arguments + // out is the index of the outgoing C arguments + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for (int r = 0 ; r < RegisterImpl::number_of_registers ; r++) { + reg_destroyed[r] = false; + } + for (int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++) { + freg_destroyed[f] = false; + } +#endif // ASSERT + + for (int in = total_in_args - 1, out = total_c_args - 1; in >= 0 ; in--, out--) { + +#ifdef ASSERT + if (in_regs[in].first()->is_Register()) { + assert(!reg_destroyed[in_regs[in].first()->as_Register()->encoding()], "ack!"); + } else if (in_regs[in].first()->is_FloatRegister()) { + assert(!freg_destroyed[in_regs[in].first()->as_FloatRegister()->encoding()], "ack!"); + } + if (out_regs[out].first()->is_Register()) { + reg_destroyed[out_regs[out].first()->as_Register()->encoding()] = true; + } else if (out_regs[out].first()->is_FloatRegister()) { + freg_destroyed[out_regs[out].first()->as_FloatRegister()->encoding()] = true; + } + if (out_regs2[out].first()->is_Register()) { + reg_destroyed[out_regs2[out].first()->as_Register()->encoding()] = true; + } else if (out_regs2[out].first()->is_FloatRegister()) { + freg_destroyed[out_regs2[out].first()->as_FloatRegister()->encoding()] = true; + } +#endif // ASSERT + + switch (in_sig_bt[in]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + guarantee(in > 0 && in_sig_bt[in-1] == T_LONG, + "expecting type (T_LONG,bt) for bt in {T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}"); + break; + case T_LONG: + if (in_sig_bt[in+1] == T_VOID) { + long_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1); + } else { + guarantee(in_sig_bt[in+1] == T_BOOLEAN || in_sig_bt[in+1] == T_CHAR || + in_sig_bt[in+1] == T_BYTE || in_sig_bt[in+1] == T_SHORT || + in_sig_bt[in+1] == T_INT, + "expecting type (T_LONG,bt) for bt in {T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}"); + int_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1); + } + break; + case T_ARRAY: + if (is_critical_native) { + int body_arg = out; + out -= 2; // Point to length arg. PPC64: pass ints as longs. + unpack_array_argument(masm, in_regs[in], in_elem_bt[in], out_regs[body_arg], out_regs[out], + r_callers_sp, r_temp_1, r_temp_2); + break; + } + case T_OBJECT: + assert(!is_critical_native, "no oop arguments"); + object_move(masm, stack_slots, + oop_map, oop_handle_slot_offset, + ((in == 0) && (!method_is_static)), &receiver_offset, + in_regs[in], out_regs[out], + r_callers_sp, r_temp_1, r_temp_2); + break; + case T_VOID: + break; + case T_FLOAT: + float_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1); + if (out_regs2[out].first()->is_valid()) { + float_move(masm, in_regs[in], out_regs2[out], r_callers_sp, r_temp_1); + } + break; + case T_DOUBLE: + double_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1); + if (out_regs2[out].first()->is_valid()) { + double_move(masm, in_regs[in], out_regs2[out], r_callers_sp, r_temp_1); + } + break; + case T_ADDRESS: + fatal("found type (T_ADDRESS) in java args"); + break; + default: + ShouldNotReachHere(); + break; + } + } + + // Pre-load a static method's oop into ARG2. + // Used both by locking code and the normal JNI call code. + if (method_is_static && !is_critical_native) { + __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), + r_carg2_classorobject); + + // Now handlize the static class mirror in carg2. It's known not-null. + __ std(r_carg2_classorobject, klass_offset, R1_SP); + oop_map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); + __ addi(r_carg2_classorobject, R1_SP, klass_offset); + } + + // Get JNIEnv* which is first argument to native. + if (!is_critical_native) { + __ addi(r_carg1_jnienv, R16_thread, in_bytes(JavaThread::jni_environment_offset())); + } + + // NOTE: + // + // We have all of the arguments setup at this point. + // We MUST NOT touch any outgoing regs from this point on. + // So if we must call out we must push a new frame. + + // Get current pc for oopmap, and load it patchable relative to global toc. + oopmap_pc = (intptr_t) __ pc(); + __ calculate_address_from_global_toc(r_return_pc, (address)oopmap_pc, true, true, true, true); + + // We use the same pc/oopMap repeatedly when we call out. + oop_maps->add_gc_map(oopmap_pc - start_pc, oop_map); + + // r_return_pc now has the pc loaded that we will use when we finally call + // to native. + + // Make sure that thread is non-volatile; it crosses a bunch of VM calls below. + assert(R16_thread->is_nonvolatile(), "thread must be in non-volatile register"); + + +# if 0 + // DTrace method entry +# endif + + // Lock a synchronized method. + // -------------------------------------------------------------------------- + + if (method->is_synchronized()) { + assert(!is_critical_native, "unhandled"); + ConditionRegister r_flag = CCR1; + Register r_oop = r_temp_4; + const Register r_box = r_temp_5; + Label done, locked; + + // Load the oop for the object or class. r_carg2_classorobject contains + // either the handlized oop from the incoming arguments or the handlized + // class mirror (if the method is static). + __ ld(r_oop, 0, r_carg2_classorobject); + + // Get the lock box slot's address. + __ addi(r_box, R1_SP, lock_offset); + +# ifdef ASSERT + if (UseBiasedLocking) { + // Making the box point to itself will make it clear it went unused + // but also be obviously invalid. + __ std(r_box, 0, r_box); + } +# endif // ASSERT + + // Try fastpath for locking. + // fast_lock kills r_temp_1, r_temp_2, r_temp_3. + __ compiler_fast_lock_object(r_flag, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3); + __ beq(r_flag, locked); + + // None of the above fast optimizations worked so we have to get into the + // slow case of monitor enter. Inline a special case of call_VM that + // disallows any pending_exception. + + // Save argument registers and leave room for C-compatible ABI_112. + int frame_size = frame::abi_112_size + + round_to(total_c_args * wordSize, frame::alignment_in_bytes); + __ mr(R11_scratch1, R1_SP); + RegisterSaver::push_frame_and_save_argument_registers(masm, R12_scratch2, frame_size, total_c_args, out_regs, out_regs2); + + // Do the call. + __ set_last_Java_frame(R11_scratch1, r_return_pc); + assert(r_return_pc->is_nonvolatile(), "expecting return pc to be in non-volatile register"); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), r_oop, r_box, R16_thread); + __ reset_last_Java_frame(); + + RegisterSaver::restore_argument_registers_and_pop_frame(masm, frame_size, total_c_args, out_regs, out_regs2); + + __ asm_assert_mem8_is_zero(thread_(pending_exception), + "no pending exception allowed on exit from SharedRuntime::complete_monitor_locking_C", 0); + + __ bind(locked); + } + + + // Publish thread state + // -------------------------------------------------------------------------- + + // Use that pc we placed in r_return_pc a while back as the current frame anchor. + __ set_last_Java_frame(R1_SP, r_return_pc); + + // Transition from _thread_in_Java to _thread_in_native. + __ li(R0, _thread_in_native); + __ release(); + // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); + __ stw(R0, thread_(thread_state)); + if (UseMembar) { + __ fence(); + } + + + // The JNI call + // -------------------------------------------------------------------------- + + FunctionDescriptor* fd_native_method = (FunctionDescriptor*) native_func; + __ call_c(fd_native_method, relocInfo::runtime_call_type); + + + // Now, we are back from the native code. + + + // Unpack the native result. + // -------------------------------------------------------------------------- + + // For int-types, we do any needed sign-extension required. + // Care must be taken that the return values (R3_RET and F1_RET) + // will survive any VM calls for blocking or unlocking. + // An OOP result (handle) is done specially in the slow-path code. + + switch (ret_type) { + case T_VOID: break; // Nothing to do! + case T_FLOAT: break; // Got it where we want it (unless slow-path). + case T_DOUBLE: break; // Got it where we want it (unless slow-path). + case T_LONG: break; // Got it where we want it (unless slow-path). + case T_OBJECT: break; // Really a handle. + // Cannot de-handlize until after reclaiming jvm_lock. + case T_ARRAY: break; + + case T_BOOLEAN: { // 0 -> false(0); !0 -> true(1) + Label skip_modify; + __ cmpwi(CCR0, R3_RET, 0); + __ beq(CCR0, skip_modify); + __ li(R3_RET, 1); + __ bind(skip_modify); + break; + } + case T_BYTE: { // sign extension + __ extsb(R3_RET, R3_RET); + break; + } + case T_CHAR: { // unsigned result + __ andi(R3_RET, R3_RET, 0xffff); + break; + } + case T_SHORT: { // sign extension + __ extsh(R3_RET, R3_RET); + break; + } + case T_INT: // nothing to do + break; + default: + ShouldNotReachHere(); + break; + } + + + // Publish thread state + // -------------------------------------------------------------------------- + + // Switch thread to "native transition" state before reading the + // synchronization state. This additional state is necessary because reading + // and testing the synchronization state is not atomic w.r.t. GC, as this + // scenario demonstrates: + // - Java thread A, in _thread_in_native state, loads _not_synchronized + // and is preempted. + // - VM thread changes sync state to synchronizing and suspends threads + // for GC. + // - Thread A is resumed to finish this native method, but doesn't block + // here since it didn't see any synchronization in progress, and escapes. + + // Transition from _thread_in_native to _thread_in_native_trans. + __ li(R0, _thread_in_native_trans); + __ release(); + // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); + __ stw(R0, thread_(thread_state)); + + + // Must we block? + // -------------------------------------------------------------------------- + + // Block, if necessary, before resuming in _thread_in_Java state. + // In order for GC to work, don't clear the last_Java_sp until after blocking. + Label after_transition; + { + Label no_block, sync; + + if (os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below. + __ fence(); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(R16_thread, r_temp_4, r_temp_5); + } + } + + Register sync_state_addr = r_temp_4; + Register sync_state = r_temp_5; + Register suspend_flags = r_temp_6; + + __ load_const(sync_state_addr, SafepointSynchronize::address_of_state(), /*temp*/ sync_state); + + // TODO: PPC port assert(4 == SafepointSynchronize::sz_state(), "unexpected field size"); + __ lwz(sync_state, 0, sync_state_addr); + + // TODO: PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size"); + __ lwz(suspend_flags, thread_(suspend_flags)); + + __ acquire(); + + Label do_safepoint; + // No synchronization in progress nor yet synchronized. + __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized); + // Not suspended. + __ cmpwi(CCR1, suspend_flags, 0); + + __ bne(CCR0, sync); + __ beq(CCR1, no_block); + + // Block. Save any potential method result value before the operation and + // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this + // lets us share the oopMap we used when we went native rather than create + // a distinct one for this pc. + __ bind(sync); + + address entry_point = is_critical_native + ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition) + : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans); + save_native_result(masm, ret_type, workspace_slot_offset); + __ call_VM_leaf(entry_point, R16_thread); + restore_native_result(masm, ret_type, workspace_slot_offset); + + if (is_critical_native) { + __ b(after_transition); // No thread state transition here. + } + __ bind(no_block); + } + + // Publish thread state. + // -------------------------------------------------------------------------- + + // Thread state is thread_in_native_trans. Any safepoint blocking has + // already happened so we can now change state to _thread_in_Java. + + // Transition from _thread_in_native_trans to _thread_in_Java. + __ li(R0, _thread_in_Java); + __ release(); + // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); + __ stw(R0, thread_(thread_state)); + if (UseMembar) { + __ fence(); + } + __ bind(after_transition); + + // Reguard any pages if necessary. + // -------------------------------------------------------------------------- + + Label no_reguard; + __ lwz(r_temp_1, thread_(stack_guard_state)); + __ cmpwi(CCR0, r_temp_1, JavaThread::stack_guard_yellow_disabled); + __ bne(CCR0, no_reguard); + + save_native_result(masm, ret_type, workspace_slot_offset); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); + restore_native_result(masm, ret_type, workspace_slot_offset); + + __ bind(no_reguard); + + + // Unlock + // -------------------------------------------------------------------------- + + if (method->is_synchronized()) { + + ConditionRegister r_flag = CCR1; + const Register r_oop = r_temp_4; + const Register r_box = r_temp_5; + const Register r_exception = r_temp_6; + Label done; + + // Get oop and address of lock object box. + if (method_is_static) { + assert(klass_offset != -1, ""); + __ ld(r_oop, klass_offset, R1_SP); + } else { + assert(receiver_offset != -1, ""); + __ ld(r_oop, receiver_offset, R1_SP); + } + __ addi(r_box, R1_SP, lock_offset); + + // Try fastpath for unlocking. + __ compiler_fast_unlock_object(r_flag, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3); + __ beq(r_flag, done); + + // Save and restore any potential method result value around the unlocking operation. + save_native_result(masm, ret_type, workspace_slot_offset); + + // Must save pending exception around the slow-path VM call. Since it's a + // leaf call, the pending exception (if any) can be kept in a register. + __ ld(r_exception, thread_(pending_exception)); + assert(r_exception->is_nonvolatile(), "exception register must be non-volatile"); + __ li(R0, 0); + __ std(R0, thread_(pending_exception)); + + // Slow case of monitor enter. + // Inline a special case of call_VM that disallows any pending_exception. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box); + + __ asm_assert_mem8_is_zero(thread_(pending_exception), + "no pending exception allowed on exit from SharedRuntime::complete_monitor_unlocking_C", 0); + + restore_native_result(masm, ret_type, workspace_slot_offset); + + // Check_forward_pending_exception jump to forward_exception if any pending + // exception is set. The forward_exception routine expects to see the + // exception in pending_exception and not in a register. Kind of clumsy, + // since all folks who branch to forward_exception must have tested + // pending_exception first and hence have it in a register already. + __ std(r_exception, thread_(pending_exception)); + + __ bind(done); + } + +# if 0 + // DTrace method exit +# endif + + // Clear "last Java frame" SP and PC. + // -------------------------------------------------------------------------- + + __ reset_last_Java_frame(); + + // Unpack oop result. + // -------------------------------------------------------------------------- + + if (ret_type == T_OBJECT || ret_type == T_ARRAY) { + Label skip_unboxing; + __ cmpdi(CCR0, R3_RET, 0); + __ beq(CCR0, skip_unboxing); + __ ld(R3_RET, 0, R3_RET); + __ bind(skip_unboxing); + __ verify_oop(R3_RET); + } + + + // Reset handle block. + // -------------------------------------------------------------------------- + if (!is_critical_native) { + __ ld(r_temp_1, thread_(active_handles)); + // TODO: PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size"); + __ li(r_temp_2, 0); + __ stw(r_temp_2, JNIHandleBlock::top_offset_in_bytes(), r_temp_1); + + + // Check for pending exceptions. + // -------------------------------------------------------------------------- + __ ld(r_temp_2, thread_(pending_exception)); + __ cmpdi(CCR0, r_temp_2, 0); + __ bne(CCR0, handle_pending_exception); + } + + // Return + // -------------------------------------------------------------------------- + + __ pop_frame(); + __ restore_LR_CR(R11); + __ blr(); + + + // Handler for pending exceptions (out-of-line). + // -------------------------------------------------------------------------- + + // Since this is a native call, we know the proper exception handler + // is the empty function. We just pop this frame and then jump to + // forward_exception_entry. + if (!is_critical_native) { + __ align(InteriorEntryAlignment); + __ bind(handle_pending_exception); + + __ pop_frame(); + __ restore_LR_CR(R11); + __ b64_patchable((address)StubRoutines::forward_exception_entry(), + relocInfo::runtime_call_type); + } + + // Handler for a cache miss (out-of-line). + // -------------------------------------------------------------------------- + + if (!method_is_static) { + __ align(InteriorEntryAlignment); + __ bind(ic_miss); + + __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), + relocInfo::runtime_call_type); + } + + // Done. + // -------------------------------------------------------------------------- + + __ flush(); + + nmethod *nm = nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_start_pc-start_pc, + frame_done_pc-start_pc, + stack_slots / VMRegImpl::slots_per_word, + (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), + in_ByteSize(lock_offset), + oop_maps); + + if (is_critical_native) { + nm->set_lazy_critical_native(true); + } + + return nm; +#else + ShouldNotReachHere(); + return NULL; +#endif // COMPILER2 +} + +// This function returns the adjust size (in number of words) to a c2i adapter +// activation for use during deoptimization. +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { + return round_to((callee_locals - callee_parameters) * Interpreter::stackElementWords, frame::alignment_in_bytes); +} + +uint SharedRuntime::out_preserve_stack_slots() { +#ifdef COMPILER2 + return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size; +#else + return 0; +#endif +} + +#ifdef COMPILER2 +// Frame generation for deopt and uncommon trap blobs. +static void push_skeleton_frame(MacroAssembler* masm, bool deopt, + /* Read */ + Register unroll_block_reg, + /* Update */ + Register frame_sizes_reg, + Register number_of_frames_reg, + Register pcs_reg, + /* Invalidate */ + Register frame_size_reg, + Register pc_reg) { + + __ ld(pc_reg, 0, pcs_reg); + __ ld(frame_size_reg, 0, frame_sizes_reg); + __ std(pc_reg, _abi(lr), R1_SP); + __ push_frame(frame_size_reg, R0/*tmp*/); + __ std(R1_SP, _parent_ijava_frame_abi(initial_caller_sp), R1_SP); + __ addi(number_of_frames_reg, number_of_frames_reg, -1); + __ addi(frame_sizes_reg, frame_sizes_reg, wordSize); + __ addi(pcs_reg, pcs_reg, wordSize); +} + +// Loop through the UnrollBlock info and create new frames. +static void push_skeleton_frames(MacroAssembler* masm, bool deopt, + /* read */ + Register unroll_block_reg, + /* invalidate */ + Register frame_sizes_reg, + Register number_of_frames_reg, + Register pcs_reg, + Register frame_size_reg, + Register pc_reg) { + Label loop; + + // _number_of_frames is of type int (deoptimization.hpp) + __ lwa(number_of_frames_reg, + Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(), + unroll_block_reg); + __ ld(pcs_reg, + Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(), + unroll_block_reg); + __ ld(frame_sizes_reg, + Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes(), + unroll_block_reg); + + // stack: (caller_of_deoptee, ...). + + // At this point we either have an interpreter frame or a compiled + // frame on top of stack. If it is a compiled frame we push a new c2i + // adapter here + + // Memorize top-frame stack-pointer. + __ mr(frame_size_reg/*old_sp*/, R1_SP); + + // Resize interpreter top frame OR C2I adapter. + + // At this moment, the top frame (which is the caller of the deoptee) is + // an interpreter frame or a newly pushed C2I adapter or an entry frame. + // The top frame has a TOP_IJAVA_FRAME_ABI and the frame contains the + // outgoing arguments. + // + // In order to push the interpreter frame for the deoptee, we need to + // resize the top frame such that we are able to place the deoptee's + // locals in the frame. + // Additionally, we have to turn the top frame's TOP_IJAVA_FRAME_ABI + // into a valid PARENT_IJAVA_FRAME_ABI. + + __ lwa(R11_scratch1, + Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes(), + unroll_block_reg); + __ neg(R11_scratch1, R11_scratch1); + + // R11_scratch1 contains size of locals for frame resizing. + // R12_scratch2 contains top frame's lr. + + // Resize frame by complete frame size prevents TOC from being + // overwritten by locals. A more stack space saving way would be + // to copy the TOC to its location in the new abi. + __ addi(R11_scratch1, R11_scratch1, - frame::parent_ijava_frame_abi_size); + + // now, resize the frame + __ resize_frame(R11_scratch1, pc_reg/*tmp*/); + + // In the case where we have resized a c2i frame above, the optional + // alignment below the locals has size 32 (why?). + __ std(R12_scratch2, _abi(lr), R1_SP); + + // Initialize initial_caller_sp. + __ std(frame_size_reg/*old_sp*/, _parent_ijava_frame_abi(initial_caller_sp), R1_SP); + +#ifdef ASSERT + // Make sure that there is at least one entry in the array. + __ cmpdi(CCR0, number_of_frames_reg, 0); + __ asm_assert_ne("array_size must be > 0", 0x205); +#endif + + // Now push the new interpreter frames. + // + __ bind(loop); + // Allocate a new frame, fill in the pc. + push_skeleton_frame(masm, deopt, + unroll_block_reg, + frame_sizes_reg, + number_of_frames_reg, + pcs_reg, + frame_size_reg, + pc_reg); + __ cmpdi(CCR0, number_of_frames_reg, 0); + __ bne(CCR0, loop); + + // Get the return address pointing into the frame manager. + __ ld(R0, 0, pcs_reg); + // Store it in the top interpreter frame. + __ std(R0, _abi(lr), R1_SP); + // Initialize frame_manager_lr of interpreter top frame. + __ std(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP); +} +#endif + +void SharedRuntime::generate_deopt_blob() { + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools + CodeBuffer buffer("deopt_blob", 2048, 1024); + InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer); + Label exec_mode_initialized; + int frame_size_in_words; + OopMap* map = NULL; + OopMapSet *oop_maps = new OopMapSet(); + + // size of ABI112 plus spill slots for R3_RET and F1_RET. + const int frame_size_in_bytes = frame::abi_112_spill_size; + const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); + int first_frame_size_in_bytes = 0; // frame size of "unpack frame" for call to fetch_unroll_info. + + const Register exec_mode_reg = R21_tmp1; + + const address start = __ pc(); + +#ifdef COMPILER2 + // -------------------------------------------------------------------------- + // Prolog for non exception case! + + // We have been called from the deopt handler of the deoptee. + // + // deoptee: + // ... + // call X + // ... + // deopt_handler: call_deopt_stub + // cur. return pc --> ... + // + // So currently SR_LR points behind the call in the deopt handler. + // We adjust it such that it points to the start of the deopt handler. + // The return_pc has been stored in the frame of the deoptee and + // will replace the address of the deopt_handler in the call + // to Deoptimization::fetch_unroll_info below. + // We can't grab a free register here, because all registers may + // contain live values, so let the RegisterSaver do the adjustment + // of the return pc. + const int return_pc_adjustment_no_exception = -size_deopt_handler(); + + // Push the "unpack frame" + // Save everything in sight. + map = RegisterSaver::push_frame_abi112_and_save_live_registers(masm, + &first_frame_size_in_bytes, + /*generate_oop_map=*/ true, + return_pc_adjustment_no_exception, + RegisterSaver::return_pc_is_lr); + assert(map != NULL, "OopMap must have been created"); + + __ li(exec_mode_reg, Deoptimization::Unpack_deopt); + // Save exec mode for unpack_frames. + __ b(exec_mode_initialized); + + // -------------------------------------------------------------------------- + // Prolog for exception case + + // An exception is pending. + // We have been called with a return (interpreter) or a jump (exception blob). + // + // - R3_ARG1: exception oop + // - R4_ARG2: exception pc + + int exception_offset = __ pc() - start; + + BLOCK_COMMENT("Prolog for exception case"); + + // The RegisterSaves doesn't need to adjust the return pc for this situation. + const int return_pc_adjustment_exception = 0; + + // Push the "unpack frame". + // Save everything in sight. + assert(R4 == R4_ARG2, "exception pc must be in r4"); + RegisterSaver::push_frame_abi112_and_save_live_registers(masm, + &first_frame_size_in_bytes, + /*generate_oop_map=*/ false, + return_pc_adjustment_exception, + RegisterSaver::return_pc_is_r4); + + // Deopt during an exception. Save exec mode for unpack_frames. + __ li(exec_mode_reg, Deoptimization::Unpack_exception); + + // Store exception oop and pc in thread (location known to GC). + // This is needed since the call to "fetch_unroll_info()" may safepoint. + __ std(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread); + __ std(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread); + + // fall through + + // -------------------------------------------------------------------------- + __ BIND(exec_mode_initialized); + + { + const Register unroll_block_reg = R22_tmp2; + + // We need to set `last_Java_frame' because `fetch_unroll_info' will + // call `last_Java_frame()'. The value of the pc in the frame is not + // particularly important. It just needs to identify this blob. + __ set_last_Java_frame(R1_SP, noreg); + + // With EscapeAnalysis turned on, this call may safepoint! + __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread); + address calls_return_pc = __ last_calls_return_pc(); + // Set an oopmap for the call site that describes all our saved registers. + oop_maps->add_gc_map(calls_return_pc - start, map); + + __ reset_last_Java_frame(); + // Save the return value. + __ mr(unroll_block_reg, R3_RET); + + // Restore only the result registers that have been saved + // by save_volatile_registers(...). + RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes); + + // In excp_deopt_mode, restore and clear exception oop which we + // stored in the thread during exception entry above. The exception + // oop will be the return value of this stub. + Label skip_restore_excp; + __ cmpdi(CCR0, exec_mode_reg, Deoptimization::Unpack_exception); + __ bne(CCR0, skip_restore_excp); + __ ld(R3_RET, in_bytes(JavaThread::exception_oop_offset()), R16_thread); + __ ld(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread); + __ li(R0, 0); + __ std(R0, in_bytes(JavaThread::exception_pc_offset()), R16_thread); + __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread); + __ BIND(skip_restore_excp); + + // reload narrro_oop_base + if (UseCompressedOops && Universe::narrow_oop_base() != 0) { + __ load_const_optimized(R30, Universe::narrow_oop_base()); + } + + __ pop_frame(); + + // stack: (deoptee, optional i2c, caller of deoptee, ...). + + // pop the deoptee's frame + __ pop_frame(); + + // stack: (caller_of_deoptee, ...). + + // Loop through the `UnrollBlock' info and create interpreter frames. + push_skeleton_frames(masm, true/*deopt*/, + unroll_block_reg, + R23_tmp3, + R24_tmp4, + R25_tmp5, + R26_tmp6, + R27_tmp7); + + // stack: (skeletal interpreter frame, ..., optional skeletal + // interpreter frame, optional c2i, caller of deoptee, ...). + } + + // push an `unpack_frame' taking care of float / int return values. + __ push_frame(frame_size_in_bytes, R0/*tmp*/); + + // stack: (unpack frame, skeletal interpreter frame, ..., optional + // skeletal interpreter frame, optional c2i, caller of deoptee, + // ...). + + // Spill live volatile registers since we'll do a call. + __ std( R3_RET, _abi_112_spill(spill_ret), R1_SP); + __ stfd(F1_RET, _abi_112_spill(spill_fret), R1_SP); + + // Let the unpacker layout information in the skeletal frames just + // allocated. + __ get_PC_trash_LR(R3_RET); + __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R3_RET); + // This is a call to a LEAF method, so no oop map is required. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), + R16_thread/*thread*/, exec_mode_reg/*exec_mode*/); + __ reset_last_Java_frame(); + + // Restore the volatiles saved above. + __ ld( R3_RET, _abi_112_spill(spill_ret), R1_SP); + __ lfd(F1_RET, _abi_112_spill(spill_fret), R1_SP); + + // Pop the unpack frame. + __ pop_frame(); + __ restore_LR_CR(R0); + + // stack: (top interpreter frame, ..., optional interpreter frame, + // optional c2i, caller of deoptee, ...). + + // Initialize R14_state. + __ ld(R14_state, 0, R1_SP); + __ addi(R14_state, R14_state, + -frame::interpreter_frame_cinterpreterstate_size_in_bytes()); + // Also inititialize R15_prev_state. + __ restore_prev_state(); + + // Return to the interpreter entry point. + __ blr(); + __ flush(); +#else // COMPILER2 + __ unimplemented("deopt blob needed only with compiler"); + int exception_offset = __ pc() - start; +#endif // COMPILER2 + + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, 0, first_frame_size_in_bytes / wordSize); +} + +#ifdef COMPILER2 +void SharedRuntime::generate_uncommon_trap_blob() { + // Allocate space for the code. + ResourceMark rm; + // Setup code generation tools. + CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); + InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer); + address start = __ pc(); + + Register unroll_block_reg = R21_tmp1; + Register klass_index_reg = R22_tmp2; + Register unc_trap_reg = R23_tmp3; + + OopMapSet* oop_maps = new OopMapSet(); + int frame_size_in_bytes = frame::abi_112_size; + OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0); + + // stack: (deoptee, optional i2c, caller_of_deoptee, ...). + + // Push a dummy `unpack_frame' and call + // `Deoptimization::uncommon_trap' to pack the compiled frame into a + // vframe array and return the `UnrollBlock' information. + + // Save LR to compiled frame. + __ save_LR_CR(R11_scratch1); + + // Push an "uncommon_trap" frame. + __ push_frame_abi112(0, R11_scratch1); + + // stack: (unpack frame, deoptee, optional i2c, caller_of_deoptee, ...). + + // Set the `unpack_frame' as last_Java_frame. + // `Deoptimization::uncommon_trap' expects it and considers its + // sender frame as the deoptee frame. + // Remember the offset of the instruction whose address will be + // moved to R11_scratch1. + address gc_map_pc = __ get_PC_trash_LR(R11_scratch1); + + __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1); + + __ mr(klass_index_reg, R3); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), + R16_thread, klass_index_reg); + + // Set an oopmap for the call site. + oop_maps->add_gc_map(gc_map_pc - start, map); + + __ reset_last_Java_frame(); + + // Pop the `unpack frame'. + __ pop_frame(); + + // stack: (deoptee, optional i2c, caller_of_deoptee, ...). + + // Save the return value. + __ mr(unroll_block_reg, R3_RET); + + // Pop the uncommon_trap frame. + __ pop_frame(); + + // stack: (caller_of_deoptee, ...). + + // Allocate new interpreter frame(s) and possibly a c2i adapter + // frame. + push_skeleton_frames(masm, false/*deopt*/, + unroll_block_reg, + R22_tmp2, + R23_tmp3, + R24_tmp4, + R25_tmp5, + R26_tmp6); + + // stack: (skeletal interpreter frame, ..., optional skeletal + // interpreter frame, optional c2i, caller of deoptee, ...). + + // Push a dummy `unpack_frame' taking care of float return values. + // Call `Deoptimization::unpack_frames' to layout information in the + // interpreter frames just created. + + // Push a simple "unpack frame" here. + __ push_frame_abi112(0, R11_scratch1); + + // stack: (unpack frame, skeletal interpreter frame, ..., optional + // skeletal interpreter frame, optional c2i, caller of deoptee, + // ...). + + // Set the "unpack_frame" as last_Java_frame. + __ get_PC_trash_LR(R11_scratch1); + __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1); + + // Indicate it is the uncommon trap case. + __ li(unc_trap_reg, Deoptimization::Unpack_uncommon_trap); + // Let the unpacker layout information in the skeletal frames just + // allocated. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), + R16_thread, unc_trap_reg); + + __ reset_last_Java_frame(); + // Pop the `unpack frame'. + __ pop_frame(); + // Restore LR from top interpreter frame. + __ restore_LR_CR(R11_scratch1); + + // stack: (top interpreter frame, ..., optional interpreter frame, + // optional c2i, caller of deoptee, ...). + + // Initialize R14_state, ... + __ ld(R11_scratch1, 0, R1_SP); + __ addi(R14_state, R11_scratch1, + -frame::interpreter_frame_cinterpreterstate_size_in_bytes()); + // also initialize R15_prev_state. + __ restore_prev_state(); + // Return to the interpreter entry point. + __ blr(); + + masm->flush(); + + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, frame_size_in_bytes/wordSize); +} +#endif // COMPILER2 + +// Generate a special Compile2Runtime blob that saves all registers, and setup oopmap. +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { + assert(StubRoutines::forward_exception_entry() != NULL, + "must be generated before"); + + ResourceMark rm; + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map; + + // Allocate space for the code. Setup code generation tools. + CodeBuffer buffer("handler_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + + address start = __ pc(); + int frame_size_in_bytes = 0; + + RegisterSaver::ReturnPCLocation return_pc_location; + bool cause_return = (poll_type == POLL_AT_RETURN); + if (cause_return) { + // Nothing to do here. The frame has already been popped in MachEpilogNode. + // Register LR already contains the return pc. + return_pc_location = RegisterSaver::return_pc_is_lr; + } else { + // Use thread()->saved_exception_pc() as return pc. + return_pc_location = RegisterSaver::return_pc_is_thread_saved_exception_pc; + } + + // Save registers, fpu state, and flags. + map = RegisterSaver::push_frame_abi112_and_save_live_registers(masm, + &frame_size_in_bytes, + /*generate_oop_map=*/ true, + /*return_pc_adjustment=*/0, + return_pc_location); + + // The following is basically a call_VM. However, we need the precise + // address of the call in order to generate an oopmap. Hence, we do all the + // work outselves. + __ set_last_Java_frame(/*sp=*/R1_SP, /*pc=*/noreg); + + // The return address must always be correct so that the frame constructor + // never sees an invalid pc. + + // Do the call + __ call_VM_leaf(call_ptr, R16_thread); + address calls_return_pc = __ last_calls_return_pc(); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This + // will allow deoptimization at this safepoint to find all possible + // debug-info recordings, as well as let GC find all oops. + oop_maps->add_gc_map(calls_return_pc - start, map); + + Label noException; + + // Clear the last Java frame. + __ reset_last_Java_frame(); + + BLOCK_COMMENT(" Check pending exception."); + const Register pending_exception = R0; + __ ld(pending_exception, thread_(pending_exception)); + __ cmpdi(CCR0, pending_exception, 0); + __ beq(CCR0, noException); + + // Exception pending + RegisterSaver::restore_live_registers_and_pop_frame(masm, + frame_size_in_bytes, + /*restore_ctr=*/true); + + + BLOCK_COMMENT(" Jump to forward_exception_entry."); + // Jump to forward_exception_entry, with the issuing PC in LR + // so it looks like the original nmethod called forward_exception_entry. + __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + + // No exception case. + __ BIND(noException); + + + // Normal exit, restore registers and exit. + RegisterSaver::restore_live_registers_and_pop_frame(masm, + frame_size_in_bytes, + /*restore_ctr=*/true); + + __ blr(); + + // Make sure all code is generated + masm->flush(); + + // Fill-out other meta info + // CodeBlob frame size is in words. + return SafepointBlob::create(&buffer, oop_maps, frame_size_in_bytes / wordSize); +} + +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss) +// +// Generate a stub that calls into the vm to find out the proper destination +// of a java call. All the argument registers are live at this point +// but since this is generic code we don't know what they are and the caller +// must do any gc of the args. +// +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { + + // allocate space for the code + ResourceMark rm; + + CodeBuffer buffer(name, 1000, 512); + MacroAssembler* masm = new MacroAssembler(&buffer); + + int frame_size_in_bytes; + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = NULL; + + address start = __ pc(); + + map = RegisterSaver::push_frame_abi112_and_save_live_registers(masm, + &frame_size_in_bytes, + /*generate_oop_map*/ true, + /*return_pc_adjustment*/ 0, + RegisterSaver::return_pc_is_lr); + + // Use noreg as last_Java_pc, the return pc will be reconstructed + // from the physical frame. + __ set_last_Java_frame(/*sp*/R1_SP, noreg); + + int frame_complete = __ offset(); + + // Pass R19_method as 2nd (optional) argument, used by + // counter_overflow_stub. + __ call_VM_leaf(destination, R16_thread, R19_method); + address calls_return_pc = __ last_calls_return_pc(); + // Set an oopmap for the call site. + // We need this not only for callee-saved registers, but also for volatile + // registers that the compiler might be keeping live across a safepoint. + // Create the oopmap for the call's return pc. + oop_maps->add_gc_map(calls_return_pc - start, map); + + // R3_RET contains the address we are going to jump to assuming no exception got installed. + + // clear last_Java_sp + __ reset_last_Java_frame(); + + // Check for pending exceptions. + BLOCK_COMMENT("Check for pending exceptions."); + Label pending; + __ ld(R11_scratch1, thread_(pending_exception)); + __ cmpdi(CCR0, R11_scratch1, 0); + __ bne(CCR0, pending); + + __ mtctr(R3_RET); // Ctr will not be touched by restore_live_registers_and_pop_frame. + + RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ false); + + // Get the returned methodOop. + __ get_vm_result_2(R19_method); + + __ bctr(); + + + // Pending exception after the safepoint. + __ BIND(pending); + + RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ true); + + // exception pending => remove activation and forward to exception handler + + __ li(R11_scratch1, 0); + __ ld(R3_ARG1, thread_(pending_exception)); + __ std(R11_scratch1, in_bytes(JavaThread::vm_result_offset()), R16_thread); + __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + + // ------------- + // Make sure all code is generated. + masm->flush(); + + // return the blob + // frame_size_words or bytes?? + return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_bytes/wordSize, + oop_maps, true); +} diff --git a/src/cpu/ppc/vm/stubGenerator_ppc.cpp b/src/cpu/ppc/vm/stubGenerator_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/stubGenerator_ppc.cpp @@ -0,0 +1,2057 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "nativeInst_ppc.hpp" +#include "oops/instanceOop.hpp" +#include "oops/method.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/top.hpp" +#ifdef TARGET_OS_FAMILY_aix +# include "thread_aix.inline.hpp" +#endif +#ifdef TARGET_OS_FAMILY_linux +# include "thread_linux.inline.hpp" +#endif +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +#define __ _masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) // nothing +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +class StubGenerator: public StubCodeGenerator { + private: + + // Call stubs are used to call Java from C + // + // Arguments: + // + // R3 - call wrapper address : address + // R4 - result : intptr_t* + // R5 - result type : BasicType + // R6 - method : Method + // R7 - frame mgr entry point : address + // R8 - parameter block : intptr_t* + // R9 - parameter count in words : int + // R10 - thread : Thread* + // + address generate_call_stub(address& return_address) { + // Setup a new c frame, copy java arguments, call frame manager or + // native_entry, and process result. + + StubCodeMark mark(this, "StubRoutines", "call_stub"); + + address start = __ emit_fd(); + + // some sanity checks + assert((sizeof(frame::abi_48) % 16) == 0, "unaligned"); + assert((sizeof(frame::abi_112) % 16) == 0, "unaligned"); + assert((sizeof(frame::spill_nonvolatiles) % 16) == 0, "unaligned"); + assert((sizeof(frame::parent_ijava_frame_abi) % 16) == 0, "unaligned"); + assert((sizeof(frame::entry_frame_locals) % 16) == 0, "unaligned"); + + Register r_arg_call_wrapper_addr = R3; + Register r_arg_result_addr = R4; + Register r_arg_result_type = R5; + Register r_arg_method = R6; + Register r_arg_entry = R7; + Register r_arg_thread = R10; + + Register r_temp = R24; + Register r_top_of_arguments_addr = R25; + Register r_entryframe_fp = R26; + + { + // Stack on entry to call_stub: + // + // F1 [C_FRAME] + // ... + + Register r_arg_argument_addr = R8; + Register r_arg_argument_count = R9; + Register r_frame_alignment_in_bytes = R27; + Register r_argument_addr = R28; + Register r_argumentcopy_addr = R29; + Register r_argument_size_in_bytes = R30; + Register r_frame_size = R23; + + Label arguments_copied; + + // Save LR/CR to caller's C_FRAME. + __ save_LR_CR(R0); + + // Zero extend arg_argument_count. + __ clrldi(r_arg_argument_count, r_arg_argument_count, 32); + + // Save non-volatiles GPRs to ENTRY_FRAME (not yet pushed, but it's safe). + __ save_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); + + // Keep copy of our frame pointer (caller's SP). + __ mr(r_entryframe_fp, R1_SP); + + BLOCK_COMMENT("Push ENTRY_FRAME including arguments"); + // Push ENTRY_FRAME including arguments: + // + // F0 [TOP_IJAVA_FRAME_ABI] + // alignment (optional) + // [outgoing Java arguments] + // [ENTRY_FRAME_LOCALS] + // F1 [C_FRAME] + // ... + + // calculate frame size + + // unaligned size of arguments + __ sldi(r_argument_size_in_bytes, + r_arg_argument_count, Interpreter::logStackElementSize); + // arguments alignment (max 1 slot) + // FIXME: use round_to() here + __ andi_(r_frame_alignment_in_bytes, r_arg_argument_count, 1); + __ sldi(r_frame_alignment_in_bytes, + r_frame_alignment_in_bytes, Interpreter::logStackElementSize); + + // size = unaligned size of arguments + top abi's size + __ addi(r_frame_size, r_argument_size_in_bytes, + frame::top_ijava_frame_abi_size); + // size += arguments alignment + __ add(r_frame_size, + r_frame_size, r_frame_alignment_in_bytes); + // size += size of call_stub locals + __ addi(r_frame_size, + r_frame_size, frame::entry_frame_locals_size); + + // push ENTRY_FRAME + __ push_frame(r_frame_size, r_temp); + + // initialize call_stub locals (step 1) + __ std(r_arg_call_wrapper_addr, + _entry_frame_locals_neg(call_wrapper_address), r_entryframe_fp); + __ std(r_arg_result_addr, + _entry_frame_locals_neg(result_address), r_entryframe_fp); + __ std(r_arg_result_type, + _entry_frame_locals_neg(result_type), r_entryframe_fp); + // we will save arguments_tos_address later + + + BLOCK_COMMENT("Copy Java arguments"); + // copy Java arguments + + // Calculate top_of_arguments_addr which will be R17_tos (not prepushed) later. + // FIXME: why not simply use SP+frame::top_ijava_frame_size? + __ addi(r_top_of_arguments_addr, + R1_SP, frame::top_ijava_frame_abi_size); + __ add(r_top_of_arguments_addr, + r_top_of_arguments_addr, r_frame_alignment_in_bytes); + + // any arguments to copy? + __ cmpdi(CCR0, r_arg_argument_count, 0); + __ beq(CCR0, arguments_copied); + + // prepare loop and copy arguments in reverse order + { + // init CTR with arg_argument_count + __ mtctr(r_arg_argument_count); + + // let r_argumentcopy_addr point to last outgoing Java arguments P + __ mr(r_argumentcopy_addr, r_top_of_arguments_addr); + + // let r_argument_addr point to last incoming java argument + __ add(r_argument_addr, + r_arg_argument_addr, r_argument_size_in_bytes); + __ addi(r_argument_addr, r_argument_addr, -BytesPerWord); + + // now loop while CTR > 0 and copy arguments + { + Label next_argument; + __ bind(next_argument); + + __ ld(r_temp, 0, r_argument_addr); + // argument_addr--; + __ addi(r_argument_addr, r_argument_addr, -BytesPerWord); + __ std(r_temp, 0, r_argumentcopy_addr); + // argumentcopy_addr++; + __ addi(r_argumentcopy_addr, r_argumentcopy_addr, BytesPerWord); + + __ bdnz(next_argument); + } + } + + // Arguments copied, continue. + __ bind(arguments_copied); + } + + { + BLOCK_COMMENT("Call frame manager or native entry."); + // Call frame manager or native entry. + Register r_new_arg_entry = R14_state; + assert_different_registers(r_new_arg_entry, r_top_of_arguments_addr, + r_arg_method, r_arg_thread); + + __ mr(r_new_arg_entry, r_arg_entry); + + // Register state on entry to frame manager / native entry: + // + // R17_tos - intptr_t* sender tos (prepushed) Lesp = (SP) + copied_arguments_offset - 8 + // R19_method - Method + // R16_thread - JavaThread* + + // R17_tos must point to last argument - element_size. + __ addi(R17_tos, r_top_of_arguments_addr, -Interpreter::stackElementSize); + + // initialize call_stub locals (step 2) + // now save R17_tos as arguments_tos_address + __ std(R17_tos, _entry_frame_locals_neg(arguments_tos_address), r_entryframe_fp); + + // load argument registers for call + __ mr(R19_method, r_arg_method); + __ mr(R16_thread, r_arg_thread); + assert(R17_tos != r_arg_method, "trashed r_arg_method"); + assert(R17_tos != r_arg_thread && R19_method != r_arg_thread, "trashed r_arg_thread"); + + // Set R15_prev_state to 0 for simplifying checks in callee. + __ li(R15_prev_state, 0); + + // Stack on entry to frame manager / native entry: + // + // F0 [TOP_IJAVA_FRAME_ABI] + // alignment (optional) + // [outgoing Java arguments] + // [ENTRY_FRAME_LOCALS] + // F1 [C_FRAME] + // ... + // + + // global toc register + __ load_const(R29, MacroAssembler::global_toc(), R11_scratch1); + + // Load narrow oop base. + __ reinit_heapbase(R30, R11_scratch1); + + // Remember the senderSP so we interpreter can pop c2i arguments off of the stack + // when called via a c2i. + + // Pass initial_caller_sp to framemanager. + __ mr(R21_tmp1, R1_SP); + + // Do a light-weight C-call here, r_new_arg_entry holds the address + // of the interpreter entry point (frame manager or native entry) + // and save runtime-value of LR in return_address. + assert(r_new_arg_entry != R17_tos && r_new_arg_entry != R19_method && r_new_arg_entry != R16_thread, + "trashed r_new_arg_entry"); + return_address = __ call_stub(r_new_arg_entry); + } + + { + BLOCK_COMMENT("Returned from frame manager or native entry."); + // Returned from frame manager or native entry. + // Now pop frame, process result, and return to caller. + + // Stack on exit from frame manager / native entry: + // + // F0 [ABI] + // ... + // [ENTRY_FRAME_LOCALS] + // F1 [C_FRAME] + // ... + // + // Just pop the topmost frame ... + // + + Label ret_is_object; + Label ret_is_long; + Label ret_is_float; + Label ret_is_double; + + Register r_entryframe_fp = R30; + Register r_lr = R7_ARG5; + Register r_cr = R8_ARG6; + + // Reload some volatile registers which we've spilled before the call + // to frame manager / native entry. + // Access all locals via frame pointer, because we know nothing about + // the topmost frame's size. + __ ld(r_entryframe_fp, _abi(callers_sp), R1_SP); + assert_different_registers(r_entryframe_fp, R3_RET, r_arg_result_addr, r_arg_result_type, r_cr, r_lr); + __ ld(r_arg_result_addr, + _entry_frame_locals_neg(result_address), r_entryframe_fp); + __ ld(r_arg_result_type, + _entry_frame_locals_neg(result_type), r_entryframe_fp); + __ ld(r_cr, _abi(cr), r_entryframe_fp); + __ ld(r_lr, _abi(lr), r_entryframe_fp); + + // pop frame and restore non-volatiles, LR and CR + __ mr(R1_SP, r_entryframe_fp); + __ mtcr(r_cr); + __ mtlr(r_lr); + + // Store result depending on type. Everything that is not + // T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE is treated as T_INT. + __ cmpwi(CCR0, r_arg_result_type, T_OBJECT); + __ cmpwi(CCR1, r_arg_result_type, T_LONG); + __ cmpwi(CCR5, r_arg_result_type, T_FLOAT); + __ cmpwi(CCR6, r_arg_result_type, T_DOUBLE); + + // restore non-volatile registers + __ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); + + + // Stack on exit from call_stub: + // + // 0 [C_FRAME] + // ... + // + // no call_stub frames left. + + // All non-volatiles have been restored at this point!! + assert(R3_RET == R3, "R3_RET should be R3"); + + __ beq(CCR0, ret_is_object); + __ beq(CCR1, ret_is_long); + __ beq(CCR5, ret_is_float); + __ beq(CCR6, ret_is_double); + + // default: + __ stw(R3_RET, 0, r_arg_result_addr); + __ blr(); // return to caller + + // case T_OBJECT: + __ bind(ret_is_object); + __ std(R3_RET, 0, r_arg_result_addr); + __ blr(); // return to caller + + // case T_LONG: + __ bind(ret_is_long); + __ std(R3_RET, 0, r_arg_result_addr); + __ blr(); // return to caller + + // case T_FLOAT: + __ bind(ret_is_float); + __ stfs(F1_RET, 0, r_arg_result_addr); + __ blr(); // return to caller + + // case T_DOUBLE: + __ bind(ret_is_double); + __ stfd(F1_RET, 0, r_arg_result_addr); + __ blr(); // return to caller + } + + return start; + } + + // Return point for a Java call if there's an exception thrown in + // Java code. The exception is caught and transformed into a + // pending exception stored in JavaThread that can be tested from + // within the VM. + // + address generate_catch_exception() { + StubCodeMark mark(this, "StubRoutines", "catch_exception"); + + address start = __ pc(); + + // Registers alive + // + // R16_thread + // R3_ARG1 - address of pending exception + // R4_ARG2 - return address in call stub + + const Register exception_file = R21_tmp1; + const Register exception_line = R22_tmp2; + + __ load_const(exception_file, (void*)__FILE__); + __ load_const(exception_line, (void*)__LINE__); + + __ std(R3_ARG1, thread_(pending_exception)); + // store into `char *' + __ std(exception_file, thread_(exception_file)); + // store into `int' + __ stw(exception_line, thread_(exception_line)); + + // complete return to VM + assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before"); + + __ mtlr(R4_ARG2); + // continue in call stub + __ blr(); + + return start; + } + + // Continuation point for runtime calls returning with a pending + // exception. The pending exception check happened in the runtime + // or native call stub. The pending exception in Thread is + // converted into a Java-level exception. + // + address generate_forward_exception() { + StubCodeMark mark(this, "StubRoutines", "forward_exception"); + address start = __ pc(); + +#if !defined(PRODUCT) + if (VerifyOops) { + // Get pending exception oop. + __ ld(R3_ARG1, + in_bytes(Thread::pending_exception_offset()), + R16_thread); + // Make sure that this code is only executed if there is a pending exception. + { + Label L; + __ cmpdi(CCR0, R3_ARG1, 0); + __ bne(CCR0, L); + __ stop("StubRoutines::forward exception: no pending exception (1)"); + __ bind(L); + } + __ verify_oop(R3_ARG1, "StubRoutines::forward exception: not an oop"); + } +#endif + + // Save LR/CR and copy exception pc (LR) into R4_ARG2. + __ save_LR_CR(R4_ARG2); + __ push_frame_abi112(0, R0); + // Find exception handler. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, + SharedRuntime::exception_handler_for_return_address), + R16_thread, + R4_ARG2); + // Copy handler's address. + __ mtctr(R3_RET); + __ pop_frame(); + __ restore_LR_CR(R0); + + // Set up the arguments for the exception handler: + // - R3_ARG1: exception oop + // - R4_ARG2: exception pc. + + // Load pending exception oop. + __ ld(R3_ARG1, + in_bytes(Thread::pending_exception_offset()), + R16_thread); + + // The exception pc is the return address in the caller. + // Must load it into R4_ARG2. + __ mflr(R4_ARG2); + +#ifdef ASSERT + // Make sure exception is set. + { + Label L; + __ cmpdi(CCR0, R3_ARG1, 0); + __ bne(CCR0, L); + __ stop("StubRoutines::forward exception: no pending exception (2)"); + __ bind(L); + } +#endif + + // Clear the pending exception. + __ li(R0, 0); + __ std(R0, + in_bytes(Thread::pending_exception_offset()), + R16_thread); + // Jump to exception handler. + __ bctr(); + + return start; + } + +#undef __ +#define __ masm-> + // Continuation point for throwing of implicit exceptions that are + // not handled in the current activation. Fabricates an exception + // oop and initiates normal exception dispatching in this + // frame. Only callee-saved registers are preserved (through the + // normal register window / RegisterMap handling). If the compiler + // needs all registers to be preserved between the fault point and + // the exception handler then it must assume responsibility for that + // in AbstractCompiler::continuation_for_implicit_null_exception or + // continuation_for_implicit_division_by_zero_exception. All other + // implicit exceptions (e.g., NullPointerException or + // AbstractMethodError on entry) are either at call sites or + // otherwise assume that stack unwinding will be initiated, so + // caller saved registers were assumed volatile in the compiler. + // + // Note that we generate only this stub into a RuntimeStub, because + // it needs to be properly traversed and ignored during GC, so we + // change the meaning of the "__" macro within this method. + // + // Note: the routine set_pc_not_at_call_for_caller in + // SharedRuntime.cpp requires that this code be generated into a + // RuntimeStub. + address generate_throw_exception(const char* name, address runtime_entry, bool restore_saved_exception_pc, + Register arg1 = noreg, Register arg2 = noreg) { + CodeBuffer code(name, 1024 DEBUG_ONLY(+ 512), 0); + MacroAssembler* masm = new MacroAssembler(&code); + + OopMapSet* oop_maps = new OopMapSet(); + int frame_size_in_bytes = frame::abi_112_size; + OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0); + + StubCodeMark mark(this, "StubRoutines", "throw_exception"); + + address start = __ pc(); + + __ save_LR_CR(R11_scratch1); + + // Push a frame. + __ push_frame_abi112(0, R11_scratch1); + + address frame_complete_pc = __ pc(); + + if (restore_saved_exception_pc) { + __ unimplemented("StubGenerator::throw_exception with restore_saved_exception_pc", 74); + } + + // Note that we always have a runtime stub frame on the top of + // stack by this point. Remember the offset of the instruction + // whose address will be moved to R11_scratch1. + address gc_map_pc = __ get_PC_trash_LR(R11_scratch1); + + __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1); + + __ mr(R3_ARG1, R16_thread); + if (arg1 != noreg) { + __ mr(R4_ARG2, arg1); + } + if (arg2 != noreg) { + __ mr(R5_ARG3, arg2); + } + __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, runtime_entry), + relocInfo::none); + + // Set an oopmap for the call site. + oop_maps->add_gc_map((int)(gc_map_pc - start), map); + + __ reset_last_Java_frame(); + +#ifdef ASSERT + // Make sure that this code is only executed if there is a pending + // exception. + { + Label L; + __ ld(R0, + in_bytes(Thread::pending_exception_offset()), + R16_thread); + __ cmpdi(CCR0, R0, 0); + __ bne(CCR0, L); + __ stop("StubRoutines::throw_exception: no pending exception"); + __ bind(L); + } +#endif + + // Pop frame. + __ pop_frame(); + + __ restore_LR_CR(R11_scratch1); + + __ load_const(R11_scratch1, StubRoutines::forward_exception_entry()); + __ mtctr(R11_scratch1); + __ bctr(); + + // Create runtime stub with OopMap. + RuntimeStub* stub = + RuntimeStub::new_runtime_stub(name, &code, + /*frame_complete=*/ (int)(frame_complete_pc - start), + frame_size_in_bytes/wordSize, + oop_maps, + false); + return stub->entry_point(); + } +#undef __ +#define __ _masm-> + + // Generate G1 pre-write barrier for array. + // + // Input: + // from - register containing src address (only needed for spilling) + // to - register containing starting address + // count - register containing element count + // tmp - scratch register + // + // Kills: + // nothing + // + void gen_write_ref_array_pre_barrier(Register from, Register to, Register count, bool dest_uninitialized, Register Rtmp1) { + BarrierSet* const bs = Universe::heap()->barrier_set(); + switch (bs->kind()) { + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + // With G1, don't generate the call if we statically know that the target in uninitialized + if (!dest_uninitialized) { + const int spill_slots = 4 * wordSize; + const int frame_size = frame::abi_112_size + spill_slots; + + __ save_LR_CR(R0); + __ push_frame_abi112(spill_slots, R0); + __ std(from, frame_size - 1 * wordSize, R1_SP); + __ std(to, frame_size - 2 * wordSize, R1_SP); + __ std(count, frame_size - 3 * wordSize, R1_SP); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), to, count); + + __ ld(from, frame_size - 1 * wordSize, R1_SP); + __ ld(to, frame_size - 2 * wordSize, R1_SP); + __ ld(count, frame_size - 3 * wordSize, R1_SP); + __ pop_frame(); + __ restore_LR_CR(R0); + } + break; + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + case BarrierSet::ModRef: + break; + default: + ShouldNotReachHere(); + } + } + + // Generate CMS/G1 post-write barrier for array. + // + // Input: + // addr - register containing starting address + // count - register containing element count + // tmp - scratch register + // + // The input registers and R0 are overwritten. + // + void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp) { + BarrierSet* const bs = Universe::heap()->barrier_set(); + + switch (bs->kind()) { + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + __ save_LR_CR(R0); + // We need this frame only that the callee can spill LR/CR. + __ push_frame_abi112(0, R0); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count); + + __ pop_frame(); + __ restore_LR_CR(R0); + } + break; + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + Label Lskip_loop, Lstore_loop; + if (UseConcMarkSweepGC) { + // TODO PPC port: contribute optimization / requires shared changes + __ release(); + } + + CardTableModRefBS* const ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + assert_different_registers(addr, count, tmp); + + __ sldi(count, count, LogBytesPerHeapOop); + __ addi(count, count, -BytesPerHeapOop); + __ add(count, addr, count); + // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.) + __ srdi(addr, addr, CardTableModRefBS::card_shift); + __ srdi(count, count, CardTableModRefBS::card_shift); + __ subf(count, addr, count); + assert_different_registers(R0, addr, count, tmp); + __ load_const(tmp, (address)ct->byte_map_base); + __ addic_(count, count, 1); + __ beq(CCR0, Lskip_loop); + __ li(R0, 0); + __ mtctr(count); + // Byte store loop + __ bind(Lstore_loop); + __ stbx(R0, tmp, addr); + __ addi(addr, addr, 1); + __ bdnz(Lstore_loop); + __ bind(Lskip_loop); + } + break; + case BarrierSet::ModRef: + break; + default: + ShouldNotReachHere(); + } + } + + // Support for void zero_words_aligned8(HeapWord* to, size_t count) + // + // Arguments: + // to: + // count: + // + // Destroys: + // + address generate_zero_words_aligned8() { + StubCodeMark mark(this, "StubRoutines", "zero_words_aligned8"); + + // Implemented as in ClearArray. + address start = __ emit_fd(); + + Register base_ptr_reg = R3_ARG1; // tohw (needs to be 8b aligned) + Register cnt_dwords_reg = R4_ARG2; // count (in dwords) + Register tmp1_reg = R5_ARG3; + Register tmp2_reg = R6_ARG4; + Register zero_reg = R7_ARG5; + + // Procedure for large arrays (uses data cache block zero instruction). + Label dwloop, fast, fastloop, restloop, lastdword, done; + int cl_size=VM_Version::get_cache_line_size(), cl_dwords=cl_size>>3, cl_dwordaddr_bits=exact_log2(cl_dwords); + int min_dcbz=2; // Needs to be positive, apply dcbz only to at least min_dcbz cache lines. + + // Clear up to 128byte boundary if long enough, dword_cnt=(16-(base>>3))%16. + __ dcbtst(base_ptr_reg); // Indicate write access to first cache line ... + __ andi(tmp2_reg, cnt_dwords_reg, 1); // to check if number of dwords is even. + __ srdi_(tmp1_reg, cnt_dwords_reg, 1); // number of double dwords + __ load_const_optimized(zero_reg, 0L); // Use as zero register. + + __ cmpdi(CCR1, tmp2_reg, 0); // cnt_dwords even? + __ beq(CCR0, lastdword); // size <= 1 + __ mtctr(tmp1_reg); // Speculatively preload counter for rest loop (>0). + __ cmpdi(CCR0, cnt_dwords_reg, (min_dcbz+1)*cl_dwords-1); // Big enough to ensure >=min_dcbz cache lines are included? + __ neg(tmp1_reg, base_ptr_reg); // bit 0..58: bogus, bit 57..60: (16-(base>>3))%16, bit 61..63: 000 + + __ blt(CCR0, restloop); // Too small. (<31=(2*cl_dwords)-1 is sufficient, but bigger performs better.) + __ rldicl_(tmp1_reg, tmp1_reg, 64-3, 64-cl_dwordaddr_bits); // Extract number of dwords to 128byte boundary=(16-(base>>3))%16. + + __ beq(CCR0, fast); // already 128byte aligned + __ mtctr(tmp1_reg); // Set ctr to hit 128byte boundary (00 since size>=256-8) + + // Clear in first cache line dword-by-dword if not already 128byte aligned. + __ bind(dwloop); + __ std(zero_reg, 0, base_ptr_reg); // Clear 8byte aligned block. + __ addi(base_ptr_reg, base_ptr_reg, 8); + __ bdnz(dwloop); + + // clear 128byte blocks + __ bind(fast); + __ srdi(tmp1_reg, cnt_dwords_reg, cl_dwordaddr_bits); // loop count for 128byte loop (>0 since size>=256-8) + __ andi(tmp2_reg, cnt_dwords_reg, 1); // to check if rest even + + __ mtctr(tmp1_reg); // load counter + __ cmpdi(CCR1, tmp2_reg, 0); // rest even? + __ rldicl_(tmp1_reg, cnt_dwords_reg, 63, 65-cl_dwordaddr_bits); // rest in double dwords + + __ bind(fastloop); + __ dcbz(base_ptr_reg); // Clear 128byte aligned block. + __ addi(base_ptr_reg, base_ptr_reg, cl_size); + __ bdnz(fastloop); + + //__ dcbtst(base_ptr_reg); // Indicate write access to last cache line. + __ beq(CCR0, lastdword); // rest<=1 + __ mtctr(tmp1_reg); // load counter + + // Clear rest. + __ bind(restloop); + __ std(zero_reg, 0, base_ptr_reg); // Clear 8byte aligned block. + __ std(zero_reg, 8, base_ptr_reg); // Clear 8byte aligned block. + __ addi(base_ptr_reg, base_ptr_reg, 16); + __ bdnz(restloop); + + __ bind(lastdword); + __ beq(CCR1, done); + __ std(zero_reg, 0, base_ptr_reg); + __ bind(done); + __ blr(); // return + + return start; + } + + // The following routine generates a subroutine to throw an asynchronous + // UnknownError when an unsafe access gets a fault that could not be + // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.) + // + address generate_handler_for_unsafe_access() { + StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); + address start = __ emit_fd(); + __ unimplemented("StubRoutines::handler_for_unsafe_access", 93); + return start; + } + +#if !defined(PRODUCT) + // Wrapper which calls oopDesc::is_oop_or_null() + // Only called by MacroAssembler::verify_oop + static void verify_oop_helper(const char* message, oop o) { + if (!o->is_oop_or_null()) { + fatal(message); + } + ++ StubRoutines::_verify_oop_count; + } +#endif + + // Return address of code to be called from code generated by + // MacroAssembler::verify_oop. + // + // Don't generate, rather use C++ code. + address generate_verify_oop() { + StubCodeMark mark(this, "StubRoutines", "verify_oop"); + + // this is actually a `FunctionDescriptor*'. + address start = 0; + +#if !defined(PRODUCT) + start = CAST_FROM_FN_PTR(address, verify_oop_helper); +#endif + + return start; + } + + // Fairer handling of safepoints for native methods. + // + // Generate code which reads from the polling page. This special handling is needed as the + // linux-ppc64 kernel before 2.6.6 doesn't set si_addr on some segfaults in 64bit mode + // (cf. http://www.kernel.org/pub/linux/kernel/v2.6/ChangeLog-2.6.6), especially when we try + // to read from the safepoint polling page. + address generate_load_from_poll() { + StubCodeMark mark(this, "StubRoutines", "generate_load_from_poll"); + address start = __ emit_fd(); + __ unimplemented("StubRoutines::verify_oop", 95); // TODO PPC port + return start; + } + + // -XX:+OptimizeFill : convert fill/copy loops into intrinsic + // + // The code is implemented(ported from sparc) as we believe it benefits JVM98, however + // tracing(-XX:+TraceOptimizeFill) shows the intrinsic replacement doesn't happen at all! + // + // Source code in function is_range_check_if() shows OptimizeFill relaxed the condition + // for turning on loop predication optimization, and hence the behavior of "array range check" + // and "loop invariant check" could be influenced, which potentially boosted JVM98. + // + // We leave the code here and see if Oracle has updates in later releases(later than HS20). + // + // Generate stub for disjoint short fill. If "aligned" is true, the + // "to" address is assumed to be heapword aligned. + // + // Arguments for generated stub: + // to: R3_ARG1 + // value: R4_ARG2 + // count: R5_ARG3 treated as signed + // + address generate_fill(BasicType t, bool aligned, const char* name) { + StubCodeMark mark(this, "StubRoutines", name); + address start = __ emit_fd(); + + const Register to = R3_ARG1; // source array address + const Register value = R4_ARG2; // fill value + const Register count = R5_ARG3; // elements count + const Register temp = R6_ARG4; // temp register + + //assert_clean_int(count, O3); // Make sure 'count' is clean int. + + Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; + Label L_fill_2_bytes, L_fill_4_bytes, L_fill_elements, L_fill_32_bytes; + + int shift = -1; + switch (t) { + case T_BYTE: + shift = 2; + // clone bytes (zero extend not needed because store instructions below ignore high order bytes) + __ rldimi(value, value, 8, 48); // 8 bit -> 16 bit + __ cmpdi(CCR0, count, 2< 32 bit + break; + case T_SHORT: + shift = 1; + // clone bytes (zero extend not needed because store instructions below ignore high order bytes) + __ rldimi(value, value, 16, 32); // 16 bit -> 32 bit + __ cmpdi(CCR0, count, 2<long as above + __ rldimi(value, value, 32, 0); // 32 bit -> 64 bit + + Label L_check_fill_8_bytes; + // Fill 32-byte chunks + __ subf_(count, temp, count); + __ blt(CCR0, L_check_fill_8_bytes); + + Label L_fill_32_bytes_loop; + __ align(32); + __ bind(L_fill_32_bytes_loop); + + __ std(value, 0, to); + __ std(value, 8, to); + __ subf_(count, temp, count); // update count + __ std(value, 16, to); + __ std(value, 24, to); + + __ addi(to, to, 32); + __ bge(CCR0, L_fill_32_bytes_loop); + + __ bind(L_check_fill_8_bytes); + __ add_(count, temp, count); + __ beq(CCR0, L_exit); + __ addic_(count, count, -(2 << shift)); + __ blt(CCR0, L_fill_4_bytes); + + // + // Length is too short, just fill 8 bytes at a time. + // + Label L_fill_8_bytes_loop; + __ bind(L_fill_8_bytes_loop); + __ std(value, 0, to); + __ addic_(count, count, -(2 << shift)); + __ addi(to, to, 8); + __ bge(CCR0, L_fill_8_bytes_loop); + + // fill trailing 4 bytes + __ bind(L_fill_4_bytes); + __ andi_(temp, count, 1< to or from is aligned -> copy 8 + + // copy a 2-element word if necessary to align to 8 bytes + __ andi_(R0, R3_ARG1, 7); + __ beq(CCR0, l_7); + + __ lwzx(tmp2, R3_ARG1, tmp3); + __ addi(R5_ARG3, R5_ARG3, -4); + __ stwx(tmp2, R4_ARG2, tmp3); + { // FasterArrayCopy + __ addi(R3_ARG1, R3_ARG1, 4); + __ addi(R4_ARG2, R4_ARG2, 4); + } + __ bind(l_7); + + { // FasterArrayCopy + __ cmpwi(CCR0, R5_ARG3, 31); + __ ble(CCR0, l_6); // copy 2 at a time if less than 32 elements remain + + __ srdi(tmp1, R5_ARG3, 5); + __ andi_(R5_ARG3, R5_ARG3, 31); + __ mtctr(tmp1); + + __ bind(l_8); + // Use unrolled version for mass copying (copy 32 elements a time) + // Load feeding store gets zero latency on Power6, however not on Power5. + // Therefore, the following sequence is made for the good of both. + __ ld(tmp1, 0, R3_ARG1); + __ ld(tmp2, 8, R3_ARG1); + __ ld(tmp3, 16, R3_ARG1); + __ ld(tmp4, 24, R3_ARG1); + __ std(tmp1, 0, R4_ARG2); + __ std(tmp2, 8, R4_ARG2); + __ std(tmp3, 16, R4_ARG2); + __ std(tmp4, 24, R4_ARG2); + __ addi(R3_ARG1, R3_ARG1, 32); + __ addi(R4_ARG2, R4_ARG2, 32); + __ bdnz(l_8); + } + + __ bind(l_6); + + // copy 4 elements at a time + __ cmpwi(CCR0, R5_ARG3, 4); + __ blt(CCR0, l_1); + __ srdi(tmp1, R5_ARG3, 2); + __ mtctr(tmp1); // is > 0 + __ andi_(R5_ARG3, R5_ARG3, 3); + + { // FasterArrayCopy + __ addi(R3_ARG1, R3_ARG1, -4); + __ addi(R4_ARG2, R4_ARG2, -4); + __ bind(l_3); + __ lwzu(tmp2, 4, R3_ARG1); + __ stwu(tmp2, 4, R4_ARG2); + __ bdnz(l_3); + __ addi(R3_ARG1, R3_ARG1, 4); + __ addi(R4_ARG2, R4_ARG2, 4); + } + + // do single element copy + __ bind(l_1); + __ cmpwi(CCR0, R5_ARG3, 0); + __ beq(CCR0, l_4); + + { // FasterArrayCopy + __ mtctr(R5_ARG3); + __ addi(R3_ARG1, R3_ARG1, -1); + __ addi(R4_ARG2, R4_ARG2, -1); + + __ bind(l_5); + __ lbzu(tmp2, 1, R3_ARG1); + __ stbu(tmp2, 1, R4_ARG2); + __ bdnz(l_5); + } + + __ bind(l_4); + __ blr(); + + return start; + } + + // Generate stub for conjoint byte copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: R3_ARG1 + // to: R4_ARG2 + // count: R5_ARG3 treated as signed + // + address generate_conjoint_byte_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + address start = __ emit_fd(); + + Register tmp1 = R6_ARG4; + Register tmp2 = R7_ARG5; + Register tmp3 = R8_ARG6; + + address nooverlap_target = aligned ? + ((FunctionDescriptor*)StubRoutines::arrayof_jbyte_disjoint_arraycopy())->entry() : + ((FunctionDescriptor*)StubRoutines::jbyte_disjoint_arraycopy())->entry(); + + array_overlap_test(nooverlap_target, 0); + // Do reverse copy. We assume the case of actual overlap is rare enough + // that we don't have to optimize it. + Label l_1, l_2; + + __ b(l_2); + __ bind(l_1); + __ stbx(tmp1, R4_ARG2, R5_ARG3); + __ bind(l_2); + __ addic_(R5_ARG3, R5_ARG3, -1); + __ lbzx(tmp1, R3_ARG1, R5_ARG3); + __ bge(CCR0, l_1); + + __ blr(); + + return start; + } + + // Generate stub for disjoint short copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: R3_ARG1 + // to: R4_ARG2 + // elm.count: R5_ARG3 treated as signed + // + // Strategy for aligned==true: + // + // If length <= 9: + // 1. copy 2 elements at a time (l_6) + // 2. copy last element if original element count was odd (l_1) + // + // If length > 9: + // 1. copy 4 elements at a time until less than 4 elements are left (l_7) + // 2. copy 2 elements at a time until less than 2 elements are left (l_6) + // 3. copy last element if one was left in step 2. (l_1) + // + // + // Strategy for aligned==false: + // + // If length <= 9: same as aligned==true case, but NOTE: load/stores + // can be unaligned (see comment below) + // + // If length > 9: + // 1. continue with step 6. if the alignment of from and to mod 4 + // is different. + // 2. align from and to to 4 bytes by copying 1 element if necessary + // 3. at l_2 from and to are 4 byte aligned; continue with + // 5. if they cannot be aligned to 8 bytes because they have + // got different alignment mod 8. + // 4. at this point we know that both, from and to, have the same + // alignment mod 8, now copy one element if necessary to get + // 8 byte alignment of from and to. + // 5. copy 4 elements at a time until less than 4 elements are + // left; depending on step 3. all load/stores are aligned or + // either all loads or all stores are unaligned. + // 6. copy 2 elements at a time until less than 2 elements are + // left (l_6); arriving here from step 1., there is a chance + // that all accesses are unaligned. + // 7. copy last element if one was left in step 6. (l_1) + // + // There are unaligned data accesses using integer load/store + // instructions in this stub. POWER allows such accesses. + // + // According to the manuals (PowerISA_V2.06_PUBLIC, Book II, + // Chapter 2: Effect of Operand Placement on Performance) unaligned + // integer load/stores have good performance. Only unaligned + // floating point load/stores can have poor performance. + // + // TODO: + // + // 1. check if aligning the backbranch target of loops is beneficial + // + address generate_disjoint_short_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + + Register tmp1 = R6_ARG4; + Register tmp2 = R7_ARG5; + Register tmp3 = R8_ARG6; + Register tmp4 = R9_ARG7; + + address start = __ emit_fd(); + + Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8; + // don't try anything fancy if arrays don't have many elements + __ li(tmp3, 0); + __ cmpwi(CCR0, R5_ARG3, 9); + __ ble(CCR0, l_6); // copy 2 at a time + + if (!aligned) { + __ xorr(tmp1, R3_ARG1, R4_ARG2); + __ andi_(tmp1, tmp1, 3); + __ bne(CCR0, l_6); // if arrays don't have the same alignment mod 4, do 2 element copy + + // At this point it is guaranteed that both, from and to have the same alignment mod 4. + + // Copy 1 element if necessary to align to 4 bytes. + __ andi_(tmp1, R3_ARG1, 3); + __ beq(CCR0, l_2); + + __ lhz(tmp2, 0, R3_ARG1); + __ addi(R3_ARG1, R3_ARG1, 2); + __ sth(tmp2, 0, R4_ARG2); + __ addi(R4_ARG2, R4_ARG2, 2); + __ addi(R5_ARG3, R5_ARG3, -1); + __ bind(l_2); + + // At this point the positions of both, from and to, are at least 4 byte aligned. + + // Copy 4 elements at a time. + // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. + __ xorr(tmp2, R3_ARG1, R4_ARG2); + __ andi_(tmp1, tmp2, 7); + __ bne(CCR0, l_7); // not same alignment mod 8 -> copy 4, either from or to will be unaligned + + // Copy a 2-element word if necessary to align to 8 bytes. + __ andi_(R0, R3_ARG1, 7); + __ beq(CCR0, l_7); + + __ lwzx(tmp2, R3_ARG1, tmp3); + __ addi(R5_ARG3, R5_ARG3, -2); + __ stwx(tmp2, R4_ARG2, tmp3); + { // FasterArrayCopy + __ addi(R3_ARG1, R3_ARG1, 4); + __ addi(R4_ARG2, R4_ARG2, 4); + } + } + + __ bind(l_7); + + // Copy 4 elements at a time; either the loads or the stores can + // be unaligned if aligned == false. + + { // FasterArrayCopy + __ cmpwi(CCR0, R5_ARG3, 15); + __ ble(CCR0, l_6); // copy 2 at a time if less than 16 elements remain + + __ srdi(tmp1, R5_ARG3, 4); + __ andi_(R5_ARG3, R5_ARG3, 15); + __ mtctr(tmp1); + + __ bind(l_8); + // Use unrolled version for mass copying (copy 16 elements a time). + // Load feeding store gets zero latency on Power6, however not on Power5. + // Therefore, the following sequence is made for the good of both. + __ ld(tmp1, 0, R3_ARG1); + __ ld(tmp2, 8, R3_ARG1); + __ ld(tmp3, 16, R3_ARG1); + __ ld(tmp4, 24, R3_ARG1); + __ std(tmp1, 0, R4_ARG2); + __ std(tmp2, 8, R4_ARG2); + __ std(tmp3, 16, R4_ARG2); + __ std(tmp4, 24, R4_ARG2); + __ addi(R3_ARG1, R3_ARG1, 32); + __ addi(R4_ARG2, R4_ARG2, 32); + __ bdnz(l_8); + } + __ bind(l_6); + + // copy 2 elements at a time + { // FasterArrayCopy + __ cmpwi(CCR0, R5_ARG3, 2); + __ blt(CCR0, l_1); + __ srdi(tmp1, R5_ARG3, 1); + __ andi_(R5_ARG3, R5_ARG3, 1); + + __ addi(R3_ARG1, R3_ARG1, -4); + __ addi(R4_ARG2, R4_ARG2, -4); + __ mtctr(tmp1); + + __ bind(l_3); + __ lwzu(tmp2, 4, R3_ARG1); + __ stwu(tmp2, 4, R4_ARG2); + __ bdnz(l_3); + + __ addi(R3_ARG1, R3_ARG1, 4); + __ addi(R4_ARG2, R4_ARG2, 4); + } + + // do single element copy + __ bind(l_1); + __ cmpwi(CCR0, R5_ARG3, 0); + __ beq(CCR0, l_4); + + { // FasterArrayCopy + __ mtctr(R5_ARG3); + __ addi(R3_ARG1, R3_ARG1, -2); + __ addi(R4_ARG2, R4_ARG2, -2); + + __ bind(l_5); + __ lhzu(tmp2, 2, R3_ARG1); + __ sthu(tmp2, 2, R4_ARG2); + __ bdnz(l_5); + } + __ bind(l_4); + __ blr(); + + return start; + } + + // Generate stub for conjoint short copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: R3_ARG1 + // to: R4_ARG2 + // count: R5_ARG3 treated as signed + // + address generate_conjoint_short_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + address start = __ emit_fd(); + + Register tmp1 = R6_ARG4; + Register tmp2 = R7_ARG5; + Register tmp3 = R8_ARG6; + + address nooverlap_target = aligned ? + ((FunctionDescriptor*)StubRoutines::arrayof_jshort_disjoint_arraycopy())->entry() : + ((FunctionDescriptor*)StubRoutines::jshort_disjoint_arraycopy())->entry(); + + array_overlap_test(nooverlap_target, 1); + + Label l_1, l_2; + __ sldi(tmp1, R5_ARG3, 1); + __ b(l_2); + __ bind(l_1); + __ sthx(tmp2, R4_ARG2, tmp1); + __ bind(l_2); + __ addic_(tmp1, tmp1, -2); + __ lhzx(tmp2, R3_ARG1, tmp1); + __ bge(CCR0, l_1); + + __ blr(); + + return start; + } + + // Generate core code for disjoint int copy (and oop copy on 32-bit). If "aligned" + // is true, the "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments: + // from: R3_ARG1 + // to: R4_ARG2 + // count: R5_ARG3 treated as signed + // + void generate_disjoint_int_copy_core(bool aligned) { + Register tmp1 = R6_ARG4; + Register tmp2 = R7_ARG5; + Register tmp3 = R8_ARG6; + Register tmp4 = R0; + + Label l_1, l_2, l_3, l_4, l_5, l_6; + // for short arrays, just do single element copy + __ li(tmp3, 0); + __ cmpwi(CCR0, R5_ARG3, 5); + __ ble(CCR0, l_2); + + if (!aligned) { + // check if arrays have same alignment mod 8. + __ xorr(tmp1, R3_ARG1, R4_ARG2); + __ andi_(R0, tmp1, 7); + // Not the same alignment, but ld and std just need to be 4 byte aligned. + __ bne(CCR0, l_4); // to OR from is 8 byte aligned -> copy 2 at a time + + // copy 1 element to align to and from on an 8 byte boundary + __ andi_(R0, R3_ARG1, 7); + __ beq(CCR0, l_4); + + __ lwzx(tmp2, R3_ARG1, tmp3); + __ addi(R5_ARG3, R5_ARG3, -1); + __ stwx(tmp2, R4_ARG2, tmp3); + { // FasterArrayCopy + __ addi(R3_ARG1, R3_ARG1, 4); + __ addi(R4_ARG2, R4_ARG2, 4); + } + __ bind(l_4); + } + + { // FasterArrayCopy + __ cmpwi(CCR0, R5_ARG3, 7); + __ ble(CCR0, l_2); // copy 1 at a time if less than 8 elements remain + + __ srdi(tmp1, R5_ARG3, 3); + __ andi_(R5_ARG3, R5_ARG3, 7); + __ mtctr(tmp1); + + __ bind(l_6); + // Use unrolled version for mass copying (copy 8 elements a time). + // Load feeding store gets zero latency on power6, however not on power 5. + // Therefore, the following sequence is made for the good of both. + __ ld(tmp1, 0, R3_ARG1); + __ ld(tmp2, 8, R3_ARG1); + __ ld(tmp3, 16, R3_ARG1); + __ ld(tmp4, 24, R3_ARG1); + __ std(tmp1, 0, R4_ARG2); + __ std(tmp2, 8, R4_ARG2); + __ std(tmp3, 16, R4_ARG2); + __ std(tmp4, 24, R4_ARG2); + __ addi(R3_ARG1, R3_ARG1, 32); + __ addi(R4_ARG2, R4_ARG2, 32); + __ bdnz(l_6); + } + + // copy 1 element at a time + __ bind(l_2); + __ cmpwi(CCR0, R5_ARG3, 0); + __ beq(CCR0, l_1); + + { // FasterArrayCopy + __ mtctr(R5_ARG3); + __ addi(R3_ARG1, R3_ARG1, -4); + __ addi(R4_ARG2, R4_ARG2, -4); + + __ bind(l_3); + __ lwzu(tmp2, 4, R3_ARG1); + __ stwu(tmp2, 4, R4_ARG2); + __ bdnz(l_3); + } + + __ bind(l_1); + return; + } + + // Generate stub for disjoint int copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: R3_ARG1 + // to: R4_ARG2 + // count: R5_ARG3 treated as signed + // + address generate_disjoint_int_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + address start = __ emit_fd(); + generate_disjoint_int_copy_core(aligned); + __ blr(); + return start; + } + + // Generate core code for conjoint int copy (and oop copy on + // 32-bit). If "aligned" is true, the "from" and "to" addresses + // are assumed to be heapword aligned. + // + // Arguments: + // from: R3_ARG1 + // to: R4_ARG2 + // count: R5_ARG3 treated as signed + // + void generate_conjoint_int_copy_core(bool aligned) { + // Do reverse copy. We assume the case of actual overlap is rare enough + // that we don't have to optimize it. + + Label l_1, l_2, l_3, l_4, l_5, l_6; + + Register tmp1 = R6_ARG4; + Register tmp2 = R7_ARG5; + Register tmp3 = R8_ARG6; + Register tmp4 = R0; + + { // FasterArrayCopy + __ cmpwi(CCR0, R5_ARG3, 0); + __ beq(CCR0, l_6); + + __ sldi(R5_ARG3, R5_ARG3, 2); + __ add(R3_ARG1, R3_ARG1, R5_ARG3); + __ add(R4_ARG2, R4_ARG2, R5_ARG3); + __ srdi(R5_ARG3, R5_ARG3, 2); + + __ cmpwi(CCR0, R5_ARG3, 7); + __ ble(CCR0, l_5); // copy 1 at a time if less than 8 elements remain + + __ srdi(tmp1, R5_ARG3, 3); + __ andi(R5_ARG3, R5_ARG3, 7); + __ mtctr(tmp1); + + __ bind(l_4); + // Use unrolled version for mass copying (copy 4 elements a time). + // Load feeding store gets zero latency on Power6, however not on Power5. + // Therefore, the following sequence is made for the good of both. + __ addi(R3_ARG1, R3_ARG1, -32); + __ addi(R4_ARG2, R4_ARG2, -32); + __ ld(tmp4, 24, R3_ARG1); + __ ld(tmp3, 16, R3_ARG1); + __ ld(tmp2, 8, R3_ARG1); + __ ld(tmp1, 0, R3_ARG1); + __ std(tmp4, 24, R4_ARG2); + __ std(tmp3, 16, R4_ARG2); + __ std(tmp2, 8, R4_ARG2); + __ std(tmp1, 0, R4_ARG2); + __ bdnz(l_4); + + __ cmpwi(CCR0, R5_ARG3, 0); + __ beq(CCR0, l_6); + + __ bind(l_5); + __ mtctr(R5_ARG3); + __ bind(l_3); + __ lwz(R0, -4, R3_ARG1); + __ stw(R0, -4, R4_ARG2); + __ addi(R3_ARG1, R3_ARG1, -4); + __ addi(R4_ARG2, R4_ARG2, -4); + __ bdnz(l_3); + + __ bind(l_6); + } + } + + // Generate stub for conjoint int copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: R3_ARG1 + // to: R4_ARG2 + // count: R5_ARG3 treated as signed + // + address generate_conjoint_int_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + address start = __ emit_fd(); + + address nooverlap_target = aligned ? + ((FunctionDescriptor*)StubRoutines::arrayof_jint_disjoint_arraycopy())->entry() : + ((FunctionDescriptor*)StubRoutines::jint_disjoint_arraycopy())->entry(); + + array_overlap_test(nooverlap_target, 2); + + generate_conjoint_int_copy_core(aligned); + + __ blr(); + + return start; + } + + // Generate core code for disjoint long copy (and oop copy on + // 64-bit). If "aligned" is true, the "from" and "to" addresses + // are assumed to be heapword aligned. + // + // Arguments: + // from: R3_ARG1 + // to: R4_ARG2 + // count: R5_ARG3 treated as signed + // + void generate_disjoint_long_copy_core(bool aligned) { + Register tmp1 = R6_ARG4; + Register tmp2 = R7_ARG5; + Register tmp3 = R8_ARG6; + Register tmp4 = R0; + + Label l_1, l_2, l_3, l_4; + + { // FasterArrayCopy + __ cmpwi(CCR0, R5_ARG3, 3); + __ ble(CCR0, l_3); // copy 1 at a time if less than 4 elements remain + + __ srdi(tmp1, R5_ARG3, 2); + __ andi_(R5_ARG3, R5_ARG3, 3); + __ mtctr(tmp1); + + __ bind(l_4); + // Use unrolled version for mass copying (copy 4 elements a time). + // Load feeding store gets zero latency on Power6, however not on Power5. + // Therefore, the following sequence is made for the good of both. + __ ld(tmp1, 0, R3_ARG1); + __ ld(tmp2, 8, R3_ARG1); + __ ld(tmp3, 16, R3_ARG1); + __ ld(tmp4, 24, R3_ARG1); + __ std(tmp1, 0, R4_ARG2); + __ std(tmp2, 8, R4_ARG2); + __ std(tmp3, 16, R4_ARG2); + __ std(tmp4, 24, R4_ARG2); + __ addi(R3_ARG1, R3_ARG1, 32); + __ addi(R4_ARG2, R4_ARG2, 32); + __ bdnz(l_4); + } + + // copy 1 element at a time + __ bind(l_3); + __ cmpwi(CCR0, R5_ARG3, 0); + __ beq(CCR0, l_1); + + { // FasterArrayCopy + __ mtctr(R5_ARG3); + __ addi(R3_ARG1, R3_ARG1, -8); + __ addi(R4_ARG2, R4_ARG2, -8); + + __ bind(l_2); + __ ldu(R0, 8, R3_ARG1); + __ stdu(R0, 8, R4_ARG2); + __ bdnz(l_2); + + } + __ bind(l_1); + } + + // Generate stub for disjoint long copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: R3_ARG1 + // to: R4_ARG2 + // count: R5_ARG3 treated as signed + // + address generate_disjoint_long_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + address start = __ emit_fd(); + generate_disjoint_long_copy_core(aligned); + __ blr(); + + return start; + } + + // Generate core code for conjoint long copy (and oop copy on + // 64-bit). If "aligned" is true, the "from" and "to" addresses + // are assumed to be heapword aligned. + // + // Arguments: + // from: R3_ARG1 + // to: R4_ARG2 + // count: R5_ARG3 treated as signed + // + void generate_conjoint_long_copy_core(bool aligned) { + Register tmp1 = R6_ARG4; + Register tmp2 = R7_ARG5; + Register tmp3 = R8_ARG6; + Register tmp4 = R0; + + Label l_1, l_2, l_3, l_4, l_5; + + __ cmpwi(CCR0, R5_ARG3, 0); + __ beq(CCR0, l_1); + + { // FasterArrayCopy + __ sldi(R5_ARG3, R5_ARG3, 3); + __ add(R3_ARG1, R3_ARG1, R5_ARG3); + __ add(R4_ARG2, R4_ARG2, R5_ARG3); + __ srdi(R5_ARG3, R5_ARG3, 3); + + __ cmpwi(CCR0, R5_ARG3, 3); + __ ble(CCR0, l_5); // copy 1 at a time if less than 4 elements remain + + __ srdi(tmp1, R5_ARG3, 2); + __ andi(R5_ARG3, R5_ARG3, 3); + __ mtctr(tmp1); + + __ bind(l_4); + // Use unrolled version for mass copying (copy 4 elements a time). + // Load feeding store gets zero latency on Power6, however not on Power5. + // Therefore, the following sequence is made for the good of both. + __ addi(R3_ARG1, R3_ARG1, -32); + __ addi(R4_ARG2, R4_ARG2, -32); + __ ld(tmp4, 24, R3_ARG1); + __ ld(tmp3, 16, R3_ARG1); + __ ld(tmp2, 8, R3_ARG1); + __ ld(tmp1, 0, R3_ARG1); + __ std(tmp4, 24, R4_ARG2); + __ std(tmp3, 16, R4_ARG2); + __ std(tmp2, 8, R4_ARG2); + __ std(tmp1, 0, R4_ARG2); + __ bdnz(l_4); + + __ cmpwi(CCR0, R5_ARG3, 0); + __ beq(CCR0, l_1); + + __ bind(l_5); + __ mtctr(R5_ARG3); + __ bind(l_3); + __ ld(R0, -8, R3_ARG1); + __ std(R0, -8, R4_ARG2); + __ addi(R3_ARG1, R3_ARG1, -8); + __ addi(R4_ARG2, R4_ARG2, -8); + __ bdnz(l_3); + + } + __ bind(l_1); + } + + // Generate stub for conjoint long copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: R3_ARG1 + // to: R4_ARG2 + // count: R5_ARG3 treated as signed + // + address generate_conjoint_long_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + address start = __ emit_fd(); + + address nooverlap_target = aligned ? + ((FunctionDescriptor*)StubRoutines::arrayof_jlong_disjoint_arraycopy())->entry() : + ((FunctionDescriptor*)StubRoutines::jlong_disjoint_arraycopy())->entry(); + + array_overlap_test(nooverlap_target, 3); + generate_conjoint_long_copy_core(aligned); + + __ blr(); + + return start; + } + + // Generate stub for conjoint oop copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: R3_ARG1 + // to: R4_ARG2 + // count: R5_ARG3 treated as signed + // dest_uninitialized: G1 support + // + address generate_conjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) { + StubCodeMark mark(this, "StubRoutines", name); + + address start = __ emit_fd(); + + address nooverlap_target = aligned ? + ((FunctionDescriptor*)StubRoutines::arrayof_oop_disjoint_arraycopy())->entry() : + ((FunctionDescriptor*)StubRoutines::oop_disjoint_arraycopy())->entry(); + + gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7); + + // Save arguments. + __ mr(R9_ARG7, R4_ARG2); + __ mr(R10_ARG8, R5_ARG3); + + if (UseCompressedOops) { + array_overlap_test(nooverlap_target, 2); + generate_conjoint_int_copy_core(aligned); + } else { + array_overlap_test(nooverlap_target, 3); + generate_conjoint_long_copy_core(aligned); + } + + gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1); + + __ blr(); + + return start; + } + + // Generate stub for disjoint oop copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: R3_ARG1 + // to: R4_ARG2 + // count: R5_ARG3 treated as signed + // dest_uninitialized: G1 support + // + address generate_disjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) { + StubCodeMark mark(this, "StubRoutines", name); + address start = __ emit_fd(); + + gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7); + + // save some arguments, disjoint_long_copy_core destroys them. + // needed for post barrier + __ mr(R9_ARG7, R4_ARG2); + __ mr(R10_ARG8, R5_ARG3); + + if (UseCompressedOops) { + generate_disjoint_int_copy_core(aligned); + } else { + generate_disjoint_long_copy_core(aligned); + } + + gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1); + + __ blr(); + + return start; + } + + void generate_arraycopy_stubs() { + // Note: the disjoint stubs must be generated first, some of + // the conjoint stubs use them. + + // non-aligned disjoint versions + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); + StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); + StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy"); + StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy", false); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy_uninit", true); + + // aligned disjoint versions + StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy"); + StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy"); + StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, "arrayof_jint_disjoint_arraycopy"); + StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy"); + StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, "arrayof_oop_disjoint_arraycopy", false); + StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, "oop_disjoint_arraycopy_uninit", true); + + // non-aligned conjoint versions + StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); + StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); + StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, "jint_arraycopy"); + StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy"); + StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, "oop_arraycopy", false); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy(false, "oop_arraycopy_uninit", true); + + // aligned conjoint versions + StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy"); + StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy"); + StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, "arrayof_jint_arraycopy"); + StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, "arrayof_jlong_arraycopy"); + StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(true, "arrayof_oop_arraycopy", false); + StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(true, "arrayof_oop_arraycopy", true); + + // fill routines + StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); + StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); + StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); + StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); + StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); + StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); + } + + // Safefetch stubs. + void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) { + // safefetch signatures: + // int SafeFetch32(int* adr, int errValue); + // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); + // + // arguments: + // R3_ARG1 = adr + // R4_ARG2 = errValue + // + // result: + // R3_RET = *adr or errValue + + StubCodeMark mark(this, "StubRoutines", name); + + // Entry point, pc or function descriptor. + *entry = __ emit_fd(); + + // Load *adr into R4_ARG2, may fault. + *fault_pc = __ pc(); + switch (size) { + case 4: + // int32_t, signed extended + __ lwa(R4_ARG2, 0, R3_ARG1); + break; + case 8: + // int64_t + __ ld(R4_ARG2, 0, R3_ARG1); + break; + default: + ShouldNotReachHere(); + } + + // return errValue or *adr + *continuation_pc = __ pc(); + __ mr(R3_RET, R4_ARG2); + __ blr(); + } + + // Initialization + void generate_initial() { + // Generates all stubs and initializes the entry points + + // Entry points that exist in all platforms. + // Note: This is code that could be shared among different platforms - however the + // benefit seems to be smaller than the disadvantage of having a + // much more complicated generator structure. See also comment in + // stubRoutines.hpp. + + StubRoutines::_forward_exception_entry = generate_forward_exception(); + StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); + StubRoutines::_catch_exception_entry = generate_catch_exception(); + } + + void generate_all() { + // Generates all stubs and initializes the entry points + + // These entry points require SharedInfo::stack0 to be set up in + // non-core builds + StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); + // Handle IncompatibleClassChangeError in itable stubs. + StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false); + StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); + StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); + + StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access(); + + // support for verify_oop (must happen after universe_init) + StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); + + // arraycopy stubs used by compilers + generate_arraycopy_stubs(); + + // PPC uses stubs for safefetch. + generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, + &StubRoutines::_safefetch32_fault_pc, + &StubRoutines::_safefetch32_continuation_pc); + generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, + &StubRoutines::_safefetchN_fault_pc, + &StubRoutines::_safefetchN_continuation_pc); + } + + public: + StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { + // replace the standard masm with a special one: + _masm = new MacroAssembler(code); + if (all) { + generate_all(); + } else { + generate_initial(); + } + } +}; + +void StubGenerator_generate(CodeBuffer* code, bool all) { + StubGenerator g(code, all); +} diff --git a/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp b/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" +#ifdef TARGET_OS_FAMILY_aix +# include "thread_aix.inline.hpp" +#endif +#ifdef TARGET_OS_FAMILY_linux +# include "thread_linux.inline.hpp" +#endif + +// Implementation of the platform-specific part of StubRoutines - for +// a description of how to extend it, see the stubRoutines.hpp file. + + diff --git a/src/cpu/ppc/vm/stubRoutines_ppc_64.hpp b/src/cpu/ppc/vm/stubRoutines_ppc_64.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/stubRoutines_ppc_64.hpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_STUBROUTINES_PPC_64_HPP +#define CPU_PPC_VM_STUBROUTINES_PPC_64_HPP + +// This file holds the platform specific parts of the StubRoutines +// definition. See stubRoutines.hpp for a description on how to +// extend it. + +static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; } + +enum platform_dependent_constants { + code_size1 = 20000, // simply increase if too small (assembler will crash if too small) + code_size2 = 20000 // simply increase if too small (assembler will crash if too small) +}; + +#endif // CPU_PPC_VM_STUBROUTINES_PPC_64_HPP diff --git a/src/cpu/ppc/vm/vmStructs_ppc.hpp b/src/cpu/ppc/vm/vmStructs_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/vmStructs_ppc.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_VMSTRUCTS_PPC_HPP +#define CPU_PPC_VM_VMSTRUCTS_PPC_HPP + +// These are the CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) + +#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) + +#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // CPU_PPC_VM_VMSTRUCTS_PPC_HPP diff --git a/src/cpu/ppc/vm/vm_version_ppc.cpp b/src/cpu/ppc/vm/vm_version_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/vm_version_ppc.cpp @@ -0,0 +1,472 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "assembler_ppc.inline.hpp" +#include "compiler/disassembler.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/java.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "utilities/defaultStream.hpp" +#include "vm_version_ppc.hpp" +#ifdef TARGET_OS_FAMILY_aix +# include "os_aix.inline.hpp" +#endif +#ifdef TARGET_OS_FAMILY_linux +# include "os_linux.inline.hpp" +#endif + +# include + +int VM_Version::_features = VM_Version::unknown_m; +int VM_Version::_measured_cache_line_size = 128; // default value +const char* VM_Version::_features_str = ""; +bool VM_Version::_is_determine_features_test_running = false; + + +#define MSG(flag) \ + if (flag && !FLAG_IS_DEFAULT(flag)) \ + jio_fprintf(defaultStream::error_stream(), \ + "warning: -XX:+" #flag " requires -XX:+UseSIGTRAP\n" \ + " -XX:+" #flag " will be disabled!\n"); + +void VM_Version::initialize() { + + // Test which instructions are supported and measure cache line size. + determine_features(); + + // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features. + if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) { + if (VM_Version::has_popcntw()) { + FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7); + } else if (VM_Version::has_cmpb()) { + FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6); + } else if (VM_Version::has_popcntb()) { + FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 5); + } else { + FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 0); + } + } + guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 || + PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7, + "PowerArchitecturePPC64 should be 0, 5, 6 or 7"); + + if (!UseSIGTRAP) { + MSG(TrapBasedICMissChecks); + MSG(TrapBasedNotEntrantChecks); + MSG(TrapBasedNullChecks); + MSG(TrapBasedRangeChecks); + FLAG_SET_ERGO(bool, TrapBasedNotEntrantChecks, false); + FLAG_SET_ERGO(bool, TrapBasedNullChecks, false); + FLAG_SET_ERGO(bool, TrapBasedICMissChecks, false); + FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false); + } + +#ifdef COMPILER2 + // On Power6 test for section size. + if (PowerArchitecturePPC64 == 6) + determine_section_size(); + // TODO: PPC port else + // TODO: PPC port PdScheduling::power6SectorSize = 0x20; + + MaxVectorSize = 8; +#endif + + // Create and print feature-string. + char buf[(num_features+1) * 16]; // max 16 chars per feature + jio_snprintf(buf, sizeof(buf), + "ppc64%s%s%s%s%s%s%s%s", + (has_fsqrt() ? " fsqrt" : ""), + (has_isel() ? " isel" : ""), + (has_lxarxeh() ? " lxarxeh" : ""), + (has_cmpb() ? " cmpb" : ""), + //(has_mftgpr()? " mftgpr" : ""), + (has_popcntb() ? " popcntb" : ""), + (has_popcntw() ? " popcntw" : ""), + (has_fcfids() ? " fcfids" : ""), + (has_vand() ? " vand" : "") + // Make sure number of %s matches num_features! + ); + _features_str = strdup(buf); + NOT_PRODUCT(if (Verbose) print_features();); + + // PPC64 supports 8-byte compare-exchange operations (see + // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr) + // and 'atomic long memory ops' (see Unsafe_GetLongVolatile). + _supports_cx8 = true; + + UseSSE = 0; // Only on x86 and x64 + + intx cache_line_size = _measured_cache_line_size; + + if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1; + + if (AllocatePrefetchStyle == 4) { + AllocatePrefetchStepSize = cache_line_size; // need exact value + if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // use larger blocks by default + if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // default is not defined ? + } else { + if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size; + if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value + if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // default is not defined ? + } + + assert(AllocatePrefetchLines > 0, "invalid value"); + if (AllocatePrefetchLines < 1) // Set valid value in product VM. + AllocatePrefetchLines = 1; // Conservative value + + if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size) + AllocatePrefetchStyle = 1; // fall back if inappropriate + + assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); +} + +void VM_Version::print_features() { + tty->print_cr("Version: %s cache_line_size = %d", cpu_features(), get_cache_line_size()); +} + +#ifdef COMPILER2 +// Determine section size on power6: If section size is 8 instructions, +// there should be a difference between the two testloops of ~15 %. If +// no difference is detected the section is assumed to be 32 instructions. +void VM_Version::determine_section_size() { + + int unroll = 80; + + const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord; + + // Allocate space for the code + ResourceMark rm; + CodeBuffer cb("detect_section_size", code_size, 0); + MacroAssembler* a = new MacroAssembler(&cb); + + uint32_t *code = (uint32_t *)a->pc(); + // emit code. + void (*test1)() = (void(*)())(void *)a->emit_fd(); + + Label l1; + + a->li(R4, 1); + a->sldi(R4, R4, 28); + a->b(l1); + a->align(CodeEntryAlignment); + + a->bind(l1); + + for (int i = 0; i < unroll; i++) { + // Schleife 1 + // ------- sector 0 ------------ + // ;; 0 + a->nop(); // 1 + a->fpnop0(); // 2 + a->fpnop1(); // 3 + a->addi(R4,R4, -1); // 4 + + // ;; 1 + a->nop(); // 5 + a->fmr(F6, F6); // 6 + a->fmr(F7, F7); // 7 + a->endgroup(); // 8 + // ------- sector 8 ------------ + + // ;; 2 + a->nop(); // 9 + a->nop(); // 10 + a->fmr(F8, F8); // 11 + a->fmr(F9, F9); // 12 + + // ;; 3 + a->nop(); // 13 + a->fmr(F10, F10); // 14 + a->fmr(F11, F11); // 15 + a->endgroup(); // 16 + // -------- sector 16 ------------- + + // ;; 4 + a->nop(); // 17 + a->nop(); // 18 + a->fmr(F15, F15); // 19 + a->fmr(F16, F16); // 20 + + // ;; 5 + a->nop(); // 21 + a->fmr(F17, F17); // 22 + a->fmr(F18, F18); // 23 + a->endgroup(); // 24 + // ------- sector 24 ------------ + + // ;; 6 + a->nop(); // 25 + a->nop(); // 26 + a->fmr(F19, F19); // 27 + a->fmr(F20, F20); // 28 + + // ;; 7 + a->nop(); // 29 + a->fmr(F21, F21); // 30 + a->fmr(F22, F22); // 31 + a->brnop0(); // 32 + + // ------- sector 32 ------------ + } + + // ;; 8 + a->cmpdi(CCR0, R4, unroll);// 33 + a->bge(CCR0, l1); // 34 + a->blr(); + + // emit code. + void (*test2)() = (void(*)())(void *)a->emit_fd(); + // uint32_t *code = (uint32_t *)a->pc(); + + Label l2; + + a->li(R4, 1); + a->sldi(R4, R4, 28); + a->b(l2); + a->align(CodeEntryAlignment); + + a->bind(l2); + + for (int i = 0; i < unroll; i++) { + // Schleife 2 + // ------- sector 0 ------------ + // ;; 0 + a->brnop0(); // 1 + a->nop(); // 2 + //a->cmpdi(CCR0, R4, unroll); + a->fpnop0(); // 3 + a->fpnop1(); // 4 + a->addi(R4,R4, -1); // 5 + + // ;; 1 + + a->nop(); // 6 + a->fmr(F6, F6); // 7 + a->fmr(F7, F7); // 8 + // ------- sector 8 --------------- + + // ;; 2 + a->endgroup(); // 9 + + // ;; 3 + a->nop(); // 10 + a->nop(); // 11 + a->fmr(F8, F8); // 12 + + // ;; 4 + a->fmr(F9, F9); // 13 + a->nop(); // 14 + a->fmr(F10, F10); // 15 + + // ;; 5 + a->fmr(F11, F11); // 16 + // -------- sector 16 ------------- + + // ;; 6 + a->endgroup(); // 17 + + // ;; 7 + a->nop(); // 18 + a->nop(); // 19 + a->fmr(F15, F15); // 20 + + // ;; 8 + a->fmr(F16, F16); // 21 + a->nop(); // 22 + a->fmr(F17, F17); // 23 + + // ;; 9 + a->fmr(F18, F18); // 24 + // -------- sector 24 ------------- + + // ;; 10 + a->endgroup(); // 25 + + // ;; 11 + a->nop(); // 26 + a->nop(); // 27 + a->fmr(F19, F19); // 28 + + // ;; 12 + a->fmr(F20, F20); // 29 + a->nop(); // 30 + a->fmr(F21, F21); // 31 + + // ;; 13 + a->fmr(F22, F22); // 32 + } + + // -------- sector 32 ------------- + // ;; 14 + a->cmpdi(CCR0, R4, unroll); // 33 + a->bge(CCR0, l2); // 34 + + a->blr(); + uint32_t *code_end = (uint32_t *)a->pc(); + a->flush(); + + double loop1_seconds,loop2_seconds, rel_diff; + uint64_t start1, stop1; + + start1 = os::current_thread_cpu_time(false); + (*test1)(); + stop1 = os::current_thread_cpu_time(false); + loop1_seconds = (stop1- start1) / (1000 *1000 *1000.0); + + + start1 = os::current_thread_cpu_time(false); + (*test2)(); + stop1 = os::current_thread_cpu_time(false); + + loop2_seconds = (stop1 - start1) / (1000 *1000 *1000.0); + + rel_diff = (loop2_seconds - loop1_seconds) / loop1_seconds *100; + + if (PrintAssembly) { + ttyLocker ttyl; + tty->print_cr("Decoding section size detection stub at " INTPTR_FORMAT " before execution:", code); + Disassembler::decode((u_char*)code, (u_char*)code_end, tty); + tty->print_cr("Time loop1 :%f", loop1_seconds); + tty->print_cr("Time loop2 :%f", loop2_seconds); + tty->print_cr("(time2 - time1) / time1 = %f %%", rel_diff); + + if (rel_diff > 12.0) { + tty->print_cr("Section Size 8 Instructions"); + } else{ + tty->print_cr("Section Size 32 Instructions or Power5"); + } + } + +#if 0 // TODO: PPC port + // Set sector size (if not set explicitly). + if (FLAG_IS_DEFAULT(Power6SectorSize128PPC64)) { + if (rel_diff > 12.0) { + PdScheduling::power6SectorSize = 0x20; + } else { + PdScheduling::power6SectorSize = 0x80; + } + } else if (Power6SectorSize128PPC64) { + PdScheduling::power6SectorSize = 0x80; + } else { + PdScheduling::power6SectorSize = 0x20; + } +#endif + if (UsePower6SchedulerPPC64) Unimplemented(); +} +#endif // COMPILER2 + +void VM_Version::determine_features() { + const int code_size = (num_features+1+2*7)*BytesPerInstWord; // 7 InstWords for each call (function descriptor + blr instruction) + int features = 0; + + // create test area + enum { BUFFER_SIZE = 2*4*K }; // needs to be >=2* max cache line size (cache line size can't exceed min page size) + char test_area[BUFFER_SIZE]; + char *mid_of_test_area = &test_area[BUFFER_SIZE>>1]; + + // Allocate space for the code + ResourceMark rm; + CodeBuffer cb("detect_cpu_features", code_size, 0); + MacroAssembler* a = new MacroAssembler(&cb); + + // emit code. + void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->emit_fd(); + uint32_t *code = (uint32_t *)a->pc(); + // Don't use R0 in ldarx. + // keep R3_ARG1 = R3 unmodified, it contains &field (see below) + // keep R4_ARG2 = R4 unmodified, it contains offset = 0 (see below) + a->fsqrt(F3, F4); // code[0] -> fsqrt_m + a->isel(R7, R5, R6, 0); // code[1] -> isel_m + a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1);// code[2] -> lxarx_m + a->cmpb(R7, R5, R6); // code[3] -> bcmp + //a->mftgpr(R7, F3); // code[4] -> mftgpr + a->popcntb(R7, R5); // code[5] -> popcntb + a->popcntw(R7, R5); // code[6] -> popcntw + a->fcfids(F3, F4); // code[7] -> fcfids + a->vand(VR0, VR0, VR0); // code[8] -> vand + a->blr(); + + // Emit function to set one cache line to zero + void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->emit_fd(); // emit function descriptor and get pointer to it + a->dcbz(R3_ARG1); // R3_ARG1 = R3 = addr + a->blr(); + + uint32_t *code_end = (uint32_t *)a->pc(); + a->flush(); + + // Print the detection code. + if (PrintAssembly) { + ttyLocker ttyl; + tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", code); + Disassembler::decode((u_char*)code, (u_char*)code_end, tty); + } + + // Measure cache line size. + memset(test_area, 0xFF, BUFFER_SIZE); // fill test area with 0xFF + (*zero_cacheline_func_ptr)(mid_of_test_area); // call function which executes dcbz to the middle + int count = 0; // count zeroed bytes + for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++; + guarantee(is_power_of_2(count), "cache line size needs to be a power of 2"); + _measured_cache_line_size = count; + + // Execute code. Illegal instructions will be replaced by 0 in the signal handler. + VM_Version::_is_determine_features_test_running = true; + (*test)((address)mid_of_test_area, (uint64_t)0); + VM_Version::_is_determine_features_test_running = false; + + // determine which instructions are legal. + int feature_cntr = 0; + if (code[feature_cntr++]) features |= fsqrt_m; + if (code[feature_cntr++]) features |= isel_m; + if (code[feature_cntr++]) features |= lxarxeh_m; + if (code[feature_cntr++]) features |= cmpb_m; + //if(code[feature_cntr++])features |= mftgpr_m; + if (code[feature_cntr++]) features |= popcntb_m; + if (code[feature_cntr++]) features |= popcntw_m; + if (code[feature_cntr++]) features |= fcfids_m; + if (code[feature_cntr++]) features |= vand_m; + + // Print the detection code. + if (PrintAssembly) { + ttyLocker ttyl; + tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " after execution:", code); + Disassembler::decode((u_char*)code, (u_char*)code_end, tty); + } + + _features = features; +} + + +static int saved_features = 0; + +void VM_Version::allow_all() { + saved_features = _features; + _features = all_features_m; +} + +void VM_Version::revert() { + _features = saved_features; +} diff --git a/src/cpu/ppc/vm/vm_version_ppc.hpp b/src/cpu/ppc/vm/vm_version_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/vm_version_ppc.hpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_VM_VERSION_PPC_HPP +#define CPU_PPC_VM_VM_VERSION_PPC_HPP + +#include "runtime/globals_extension.hpp" +#include "runtime/vm_version.hpp" + +class VM_Version: public Abstract_VM_Version { +protected: + enum Feature_Flag { + fsqrt, + isel, + lxarxeh, + cmpb, + popcntb, + popcntw, + fcfids, + vand, + dcba, + num_features // last entry to count features + }; + enum Feature_Flag_Set { + unknown_m = 0, + fsqrt_m = (1 << fsqrt ), + isel_m = (1 << isel ), + lxarxeh_m = (1 << lxarxeh), + cmpb_m = (1 << cmpb ), + popcntb_m = (1 << popcntb), + popcntw_m = (1 << popcntw), + fcfids_m = (1 << fcfids ), + vand_m = (1 << vand ), + dcba_m = (1 << dcba ), + all_features_m = -1 + }; + static int _features; + static int _measured_cache_line_size; + static const char* _features_str; + static bool _is_determine_features_test_running; + + static void print_features(); + static void determine_features(); // also measures cache line size + static void determine_section_size(); + static void power6_micro_bench(); +public: + // Initialization + static void initialize(); + + static bool is_determine_features_test_running() { return _is_determine_features_test_running; } + // CPU instruction support + static bool has_fsqrt() { return (_features & fsqrt_m) != 0; } + static bool has_isel() { return (_features & isel_m) != 0; } + static bool has_lxarxeh() { return (_features & lxarxeh_m) !=0; } + static bool has_cmpb() { return (_features & cmpb_m) != 0; } + static bool has_popcntb() { return (_features & popcntb_m) != 0; } + static bool has_popcntw() { return (_features & popcntw_m) != 0; } + static bool has_fcfids() { return (_features & fcfids_m) != 0; } + static bool has_vand() { return (_features & vand_m) != 0; } + static bool has_dcba() { return (_features & dcba_m) != 0; } + + static const char* cpu_features() { return _features_str; } + + static int get_cache_line_size() { return _measured_cache_line_size; } + + // Assembler testing + static void allow_all(); + static void revert(); +}; + +#endif // CPU_PPC_VM_VM_VERSION_PPC_HPP diff --git a/src/cpu/ppc/vm/vmreg_ppc.cpp b/src/cpu/ppc/vm/vmreg_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/vmreg_ppc.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "code/vmreg.hpp" + +void VMRegImpl::set_regName() { + Register reg = ::as_Register(0); + int i; + for (i = 0; i < ConcreteRegisterImpl::max_gpr; ) { + regName[i++] = reg->name(); + regName[i++] = reg->name(); + if (reg->encoding() < RegisterImpl::number_of_registers-1) + reg = reg->successor(); + } + + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr; ) { + regName[i++] = freg->name(); + regName[i++] = freg->name(); + if (reg->encoding() < FloatRegisterImpl::number_of_registers-1) + freg = freg->successor(); + } + for ( ; i < ConcreteRegisterImpl::number_of_registers; i++) { + regName[i] = "NON-GPR-FPR"; + } +} + diff --git a/src/cpu/ppc/vm/vmreg_ppc.hpp b/src/cpu/ppc/vm/vmreg_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/vmreg_ppc.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_VMREG_PPC_HPP +#define CPU_PPC_VM_VMREG_PPC_HPP + + bool is_Register(); + Register as_Register(); + + bool is_FloatRegister(); + FloatRegister as_FloatRegister(); + +#endif // CPU_PPC_VM_VMREG_PPC_HPP diff --git a/src/cpu/ppc/vm/vmreg_ppc.inline.hpp b/src/cpu/ppc/vm/vmreg_ppc.inline.hpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/vmreg_ppc.inline.hpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_VMREG_PPC_INLINE_HPP +#define CPU_PPC_VM_VMREG_PPC_INLINE_HPP + +inline VMReg RegisterImpl::as_VMReg() { + if (this == noreg) return VMRegImpl::Bad(); + return VMRegImpl::as_VMReg(encoding() << 1); +} + +// Since we don't have two halfs here, don't multiply by 2. +inline VMReg ConditionRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg((encoding()) + ConcreteRegisterImpl::max_fpr); +} + +inline VMReg FloatRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); +} + +inline VMReg SpecialRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg((encoding()) + ConcreteRegisterImpl::max_cnd); +} + +inline bool VMRegImpl::is_Register() { + return (unsigned int)value() < (unsigned int)ConcreteRegisterImpl::max_gpr; +} + +inline bool VMRegImpl::is_FloatRegister() { + return value() >= ConcreteRegisterImpl::max_gpr && + value() < ConcreteRegisterImpl::max_fpr; +} + +inline Register VMRegImpl::as_Register() { + assert(is_Register() && is_even(value()), "even-aligned GPR name"); + return ::as_Register(value()>>1); +} + +inline FloatRegister VMRegImpl::as_FloatRegister() { + assert(is_FloatRegister() && is_even(value()), "must be"); + return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); +} + +inline bool VMRegImpl::is_concrete() { + assert(is_reg(), "must be"); + return is_even(value()); +} + +#endif // CPU_PPC_VM_VMREG_PPC_INLINE_HPP diff --git a/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp b/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp new file mode 100644 --- /dev/null +++ b/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp @@ -0,0 +1,269 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/vtableStubs.hpp" +#include "interp_masm_ppc_64.hpp" +#include "memory/resourceArea.hpp" +#include "oops/instanceKlass.hpp" +#include "oops/klassVtable.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_ppc.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +#define __ masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) // nothing +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +#ifndef PRODUCT +extern "C" void bad_compiled_vtable_index(JavaThread* thread, oopDesc* receiver, int index); +#endif + +// Used by compiler only; may use only caller saved, non-argument +// registers. +VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { + // PPC port: use fixed size. + const int code_length = VtableStub::pd_code_size_limit(true); + VtableStub* s = new (code_length) VtableStub(true, vtable_index); + ResourceMark rm; + CodeBuffer cb(s->entry_point(), code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + address start_pc; + +#ifndef PRODUCT + if (CountCompiledCalls) { + __ load_const(R11_scratch1, SharedRuntime::nof_megamorphic_calls_addr()); + __ lwz(R12_scratch2, 0, R11_scratch1); + __ addi(R12_scratch2, R12_scratch2, 1); + __ stw(R12_scratch2, 0, R11_scratch1); + } +#endif + + assert(VtableStub::receiver_location() == R3_ARG1->as_VMReg(), "receiver expected in R3_ARG1"); + + // Get receiver klass. + const Register rcvr_klass = R11_scratch1; + + // We might implicit NULL fault here. + address npe_addr = __ pc(); // npe = null pointer exception + __ load_klass_with_trap_null_check(rcvr_klass, R3); + + // Set methodOop (in case of interpreted method), and destination address. + int entry_offset = InstanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size(); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + // Check offset vs vtable length. + const Register vtable_len = R12_scratch2; + __ lwz(vtable_len, InstanceKlass::vtable_length_offset()*wordSize, rcvr_klass); + __ cmpwi(CCR0, vtable_len, vtable_index*vtableEntry::size()); + __ bge(CCR0, L); + __ li(R12_scratch2, vtable_index); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), R3_ARG1, R12_scratch2, false); + __ bind(L); + } +#endif + + int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes(); + + __ ld(R19_method, v_off, rcvr_klass); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + __ cmpdi(CCR0, R19_method, 0); + __ bne(CCR0, L); + __ stop("Vtable entry is ZERO", 102); + __ bind(L); + } +#endif + + // If the vtable entry is null, the method is abstract. + address ame_addr = __ pc(); // ame = abstract method error + + __ ld_with_trap_null_check(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method); + __ mtctr(R12_scratch2); + __ bctr(); + masm->flush(); + + guarantee(__ pc() <= s->code_end(), "overflowed buffer"); + + s->set_exception_points(npe_addr, ame_addr); + + return s; +} + +VtableStub* VtableStubs::create_itable_stub(int vtable_index) { + // PPC port: use fixed size. + const int code_length = VtableStub::pd_code_size_limit(false); + VtableStub* s = new (code_length) VtableStub(false, vtable_index); + ResourceMark rm; + CodeBuffer cb(s->entry_point(), code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + address start_pc; + +#ifndef PRODUCT + if (CountCompiledCalls) { + __ load_const(R11_scratch1, SharedRuntime::nof_megamorphic_calls_addr()); + __ lwz(R12_scratch2, 0, R11_scratch1); + __ addi(R12_scratch2, R12_scratch2, 1); + __ stw(R12_scratch2, 0, R11_scratch1); + } +#endif + + assert(VtableStub::receiver_location() == R3_ARG1->as_VMReg(), "receiver expected in R3_ARG1"); + + // Entry arguments: + // R19_method: Interface + // R3_ARG1: Receiver + // + + const Register rcvr_klass = R11_scratch1; + const Register vtable_len = R12_scratch2; + const Register itable_entry_addr = R21_tmp1; + const Register itable_interface = R22_tmp2; + + // Get receiver klass. + + // We might implicit NULL fault here. + address npe_addr = __ pc(); // npe = null pointer exception + __ load_klass_with_trap_null_check(rcvr_klass, R3_ARG1); + + //__ ld(rcvr_klass, oopDesc::klass_offset_in_bytes(), R3_ARG1); + + BLOCK_COMMENT("Load start of itable entries into itable_entry."); + __ lwz(vtable_len, InstanceKlass::vtable_length_offset() * wordSize, rcvr_klass); + __ slwi(vtable_len, vtable_len, exact_log2(vtableEntry::size() * wordSize)); + __ add(itable_entry_addr, vtable_len, rcvr_klass); + + // Loop over all itable entries until desired interfaceOop(Rinterface) found. + BLOCK_COMMENT("Increment itable_entry_addr in loop."); + const int vtable_base_offset = InstanceKlass::vtable_start_offset() * wordSize; + __ addi(itable_entry_addr, itable_entry_addr, vtable_base_offset + itableOffsetEntry::interface_offset_in_bytes()); + + const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize; + Label search; + __ bind(search); + __ ld(itable_interface, 0, itable_entry_addr); + + // Handle IncompatibleClassChangeError in itable stubs. + // If the entry is NULL then we've reached the end of the table + // without finding the expected interface, so throw an exception. + BLOCK_COMMENT("Handle IncompatibleClassChangeError in itable stubs."); + Label throw_icce; + __ cmpdi(CCR1, itable_interface, 0); + __ cmpd(CCR0, itable_interface, R19_method); + __ addi(itable_entry_addr, itable_entry_addr, itable_offset_search_inc); + __ beq(CCR1, throw_icce); + __ bne(CCR0, search); + + // Entry found and itable_entry_addr points to it, get offset of vtable for interface. + + const Register vtable_offset = R12_scratch2; + const Register itable_method = R11_scratch1; + + const int vtable_offset_offset = (itableOffsetEntry::offset_offset_in_bytes() - + itableOffsetEntry::interface_offset_in_bytes()) - + itable_offset_search_inc; + __ lwz(vtable_offset, vtable_offset_offset, itable_entry_addr); + + // Compute itableMethodEntry and get methodOop and entry point for compiler. + const int method_offset = (itableMethodEntry::size() * wordSize * vtable_index) + + itableMethodEntry::method_offset_in_bytes(); + + __ add(itable_method, rcvr_klass, vtable_offset); + __ ld(R19_method, method_offset, itable_method); + +#ifndef PRODUCT + if (DebugVtables) { + Label ok; + __ cmpd(CCR0, R19_method, 0); + __ bne(CCR0, ok); + __ stop("methodOop is null", 103); + __ bind(ok); + } +#endif + + // If the vtable entry is null, the method is abstract. + address ame_addr = __ pc(); // ame = abstract method error + + // Must do an explicit check if implicit checks are disabled. + assert(!MacroAssembler::needs_explicit_null_check(in_bytes(Method::from_compiled_offset())), "sanity"); + if (!ImplicitNullChecks NOT_LINUX(|| true) /*!os::zero_page_read_protected()*/) { + if (TrapBasedNullChecks) { + __ trap_null_check(R19_method); + } else { + __ cmpdi(CCR0, R19_method, 0); + __ beq(CCR0, throw_icce); + } + } + __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method); + __ mtctr(R12_scratch2); + __ bctr(); + + // Handle IncompatibleClassChangeError in itable stubs. + // More detailed error message. + // We force resolving of the call site by jumping to the "handle + // wrong method" stub, and so let the interpreter runtime do all the + // dirty work. + __ bind(throw_icce); + __ load_const(R11_scratch1, SharedRuntime::get_handle_wrong_method_stub()); + __ mtctr(R11_scratch1); + __ bctr(); + + masm->flush(); + + guarantee(__ pc() <= s->code_end(), "overflowed buffer"); + + s->set_exception_points(npe_addr, ame_addr); + return s; +} + +int VtableStub::pd_code_size_limit(bool is_vtable_stub) { + if (TraceJumps || DebugVtables || CountCompiledCalls || VerifyOops) { + return 1000; + } else { + if (is_vtable_stub) { + return 20 + 16 + 8; // Plain + (cOops & Traps) + safety + } else { + return 16 + 96; + } + } +} + +int VtableStub::pd_code_alignment() { + const unsigned int icache_line_size = 32; + return icache_line_size; +} diff --git a/src/os_cpu/linux_ppc/vm/atomic_linux_ppc.inline.hpp b/src/os_cpu/linux_ppc/vm/atomic_linux_ppc.inline.hpp new file mode 100644 --- /dev/null +++ b/src/os_cpu/linux_ppc/vm/atomic_linux_ppc.inline.hpp @@ -0,0 +1,401 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_INLINE_HPP +#define OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_INLINE_HPP + +#include "orderAccess_linux_ppc.inline.hpp" +#include "runtime/atomic.hpp" +#include "runtime/os.hpp" +#include "vm_version_ppc.hpp" + +#ifndef PPC64 +#error "Atomic currently only implemented for PPC64" +#endif + +// Implementation of class atomic + +inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; } +inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; } +inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; } +inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; } +inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; } +inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; } + +inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; } +inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; } +inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; } +inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; } +inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; } +inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; } + +inline jlong Atomic::load(volatile jlong* src) { return *src; } + +/* + machine barrier instructions: + + - sync two-way memory barrier, aka fence + - lwsync orders Store|Store, + Load|Store, + Load|Load, + but not Store|Load + - eieio orders memory accesses for device memory (only) + - isync invalidates speculatively executed instructions + From the POWER ISA 2.06 documentation: + "[...] an isync instruction prevents the execution of + instructions following the isync until instructions + preceding the isync have completed, [...]" + From IBM's AIX assembler reference: + "The isync [...] instructions causes the processor to + refetch any instructions that might have been fetched + prior to the isync instruction. The instruction isync + causes the processor to wait for all previous instructions + to complete. Then any instructions already fetched are + discarded and instruction processing continues in the + environment established by the previous instructions." + + semantic barrier instructions: + (as defined in orderAccess.hpp) + + - release orders Store|Store, (maps to lwsync) + Load|Store + - acquire orders Load|Store, (maps to lwsync) + Load|Load + - fence orders Store|Store, (maps to sync) + Load|Store, + Load|Load, + Store|Load +*/ + +#define strasm_sync "\n sync \n" +#define strasm_lwsync "\n lwsync \n" +#define strasm_isync "\n isync \n" +#define strasm_release strasm_lwsync +#define strasm_acquire strasm_lwsync +#define strasm_fence strasm_sync +#define strasm_nobarrier "" +#define strasm_nobarrier_clobber_memory "" + +inline jint Atomic::add (jint add_value, volatile jint* dest) { + + unsigned int result; + + __asm__ __volatile__ ( + strasm_lwsync + "1: lwarx %0, 0, %2 \n" + " add %0, %0, %1 \n" + " stwcx. %0, 0, %2 \n" + " bne- 1b \n" + strasm_isync + : /*%0*/"=&r" (result) + : /*%1*/"r" (add_value), /*%2*/"r" (dest) + : "cc", "memory" ); + + return (jint) result; +} + + +inline intptr_t Atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest) { + + long result; + + __asm__ __volatile__ ( + strasm_lwsync + "1: ldarx %0, 0, %2 \n" + " add %0, %0, %1 \n" + " stdcx. %0, 0, %2 \n" + " bne- 1b \n" + strasm_isync + : /*%0*/"=&r" (result) + : /*%1*/"r" (add_value), /*%2*/"r" (dest) + : "cc", "memory" ); + + return (intptr_t) result; +} + +inline void* Atomic::add_ptr(intptr_t add_value, volatile void* dest) { + return (void*)add_ptr(add_value, (volatile intptr_t*)dest); +} + + +inline void Atomic::inc (volatile jint* dest) { + + unsigned int temp; + + __asm__ __volatile__ ( + strasm_nobarrier + "1: lwarx %0, 0, %2 \n" + " addic %0, %0, 1 \n" + " stwcx. %0, 0, %2 \n" + " bne- 1b \n" + strasm_nobarrier + : /*%0*/"=&r" (temp), "=m" (*dest) + : /*%2*/"r" (dest), "m" (*dest) + : "cc" strasm_nobarrier_clobber_memory); + +} + +inline void Atomic::inc_ptr(volatile intptr_t* dest) { + + long temp; + + __asm__ __volatile__ ( + strasm_nobarrier + "1: ldarx %0, 0, %2 \n" + " addic %0, %0, 1 \n" + " stdcx. %0, 0, %2 \n" + " bne- 1b \n" + strasm_nobarrier + : /*%0*/"=&r" (temp), "=m" (*dest) + : /*%2*/"r" (dest), "m" (*dest) + : "cc" strasm_nobarrier_clobber_memory); + +} + +inline void Atomic::inc_ptr(volatile void* dest) { + inc_ptr((volatile intptr_t*)dest); +} + + +inline void Atomic::dec (volatile jint* dest) { + + unsigned int temp; + + __asm__ __volatile__ ( + strasm_nobarrier + "1: lwarx %0, 0, %2 \n" + " addic %0, %0, -1 \n" + " stwcx. %0, 0, %2 \n" + " bne- 1b \n" + strasm_nobarrier + : /*%0*/"=&r" (temp), "=m" (*dest) + : /*%2*/"r" (dest), "m" (*dest) + : "cc" strasm_nobarrier_clobber_memory); + +} + +inline void Atomic::dec_ptr(volatile intptr_t* dest) { + + long temp; + + __asm__ __volatile__ ( + strasm_nobarrier + "1: ldarx %0, 0, %2 \n" + " addic %0, %0, -1 \n" + " stdcx. %0, 0, %2 \n" + " bne- 1b \n" + strasm_nobarrier + : /*%0*/"=&r" (temp), "=m" (*dest) + : /*%2*/"r" (dest), "m" (*dest) + : "cc" strasm_nobarrier_clobber_memory); + +} + +inline void Atomic::dec_ptr(volatile void* dest) { + dec_ptr((volatile intptr_t*)dest); +} + +inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) { + + // Note that xchg_ptr doesn't necessarily do an acquire + // (see synchronizer.cpp). + + unsigned int old_value; + const uint64_t zero = 0; + + __asm__ __volatile__ ( + /* lwsync */ + strasm_lwsync + /* atomic loop */ + "1: \n" + " lwarx %[old_value], %[dest], %[zero] \n" + " stwcx. %[exchange_value], %[dest], %[zero] \n" + " bne- 1b \n" + /* isync */ + strasm_sync + /* exit */ + "2: \n" + /* out */ + : [old_value] "=&r" (old_value), + "=m" (*dest) + /* in */ + : [dest] "b" (dest), + [zero] "r" (zero), + [exchange_value] "r" (exchange_value), + "m" (*dest) + /* clobber */ + : "cc", + "memory" + ); + + return (jint) old_value; +} + +inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) { + + // Note that xchg_ptr doesn't necessarily do an acquire + // (see synchronizer.cpp). + + long old_value; + const uint64_t zero = 0; + + __asm__ __volatile__ ( + /* lwsync */ + strasm_lwsync + /* atomic loop */ + "1: \n" + " ldarx %[old_value], %[dest], %[zero] \n" + " stdcx. %[exchange_value], %[dest], %[zero] \n" + " bne- 1b \n" + /* isync */ + strasm_sync + /* exit */ + "2: \n" + /* out */ + : [old_value] "=&r" (old_value), + "=m" (*dest) + /* in */ + : [dest] "b" (dest), + [zero] "r" (zero), + [exchange_value] "r" (exchange_value), + "m" (*dest) + /* clobber */ + : "cc", + "memory" + ); + + return (intptr_t) old_value; +} + +inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) { + return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest); +} + +inline jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value) { + + // Note that cmpxchg guarantees a two-way memory barrier across + // the cmpxchg, so it's really a a 'fence_cmpxchg_acquire' + // (see atomic.hpp). + + unsigned int old_value; + const uint64_t zero = 0; + + __asm__ __volatile__ ( + /* fence */ + strasm_sync + /* simple guard */ + " lwz %[old_value], 0(%[dest]) \n" + " cmpw %[compare_value], %[old_value] \n" + " bne- 2f \n" + /* atomic loop */ + "1: \n" + " lwarx %[old_value], %[dest], %[zero] \n" + " cmpw %[compare_value], %[old_value] \n" + " bne- 2f \n" + " stwcx. %[exchange_value], %[dest], %[zero] \n" + " bne- 1b \n" + /* acquire */ + strasm_sync + /* exit */ + "2: \n" + /* out */ + : [old_value] "=&r" (old_value), + "=m" (*dest) + /* in */ + : [dest] "b" (dest), + [zero] "r" (zero), + [compare_value] "r" (compare_value), + [exchange_value] "r" (exchange_value), + "m" (*dest) + /* clobber */ + : "cc", + "memory" + ); + + return (jint) old_value; +} + +inline jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong* dest, jlong compare_value) { + + // Note that cmpxchg guarantees a two-way memory barrier across + // the cmpxchg, so it's really a a 'fence_cmpxchg_acquire' + // (see atomic.hpp). + + long old_value; + const uint64_t zero = 0; + + __asm__ __volatile__ ( + /* fence */ + strasm_sync + /* simple guard */ + " ld %[old_value], 0(%[dest]) \n" + " cmpd %[compare_value], %[old_value] \n" + " bne- 2f \n" + /* atomic loop */ + "1: \n" + " ldarx %[old_value], %[dest], %[zero] \n" + " cmpd %[compare_value], %[old_value] \n" + " bne- 2f \n" + " stdcx. %[exchange_value], %[dest], %[zero] \n" + " bne- 1b \n" + /* acquire */ + strasm_sync + /* exit */ + "2: \n" + /* out */ + : [old_value] "=&r" (old_value), + "=m" (*dest) + /* in */ + : [dest] "b" (dest), + [zero] "r" (zero), + [compare_value] "r" (compare_value), + [exchange_value] "r" (exchange_value), + "m" (*dest) + /* clobber */ + : "cc", + "memory" + ); + + return (jlong) old_value; +} + +inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) { + return (intptr_t)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value); +} + +inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value) { + return (void*)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value); +} + +#undef strasm_sync +#undef strasm_lwsync +#undef strasm_isync +#undef strasm_release +#undef strasm_acquire +#undef strasm_fence +#undef strasm_nobarrier +#undef strasm_nobarrier_clobber_memory + +#endif // OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_INLINE_HPP diff --git a/src/os_cpu/linux_ppc/vm/globals_linux_ppc.hpp b/src/os_cpu/linux_ppc/vm/globals_linux_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/os_cpu/linux_ppc/vm/globals_linux_ppc.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_PPC_VM_GLOBALS_LINUX_PPC_HPP +#define OS_CPU_LINUX_PPC_VM_GLOBALS_LINUX_PPC_HPP + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, DontYieldALot, false); +define_pd_global(intx, ThreadStackSize, 2048); // 0 => use system default +define_pd_global(intx, VMThreadStackSize, 2048); + +// if we set CompilerThreadStackSize to a value different than 0, it will +// be used in os::create_thread(). Otherwise, due the strange logic in os::create_thread(), +// the stack size for compiler threads will default to VMThreadStackSize, although it +// is defined to 4M in os::Linux::default_stack_size()! +define_pd_global(intx, CompilerThreadStackSize, 4096); + +// Allow extra space in DEBUG builds for asserts. +define_pd_global(uintx,JVMInvokeMethodSlack, 8192); + +define_pd_global(intx, StackYellowPages, 6); +define_pd_global(intx, StackRedPages, 1); +define_pd_global(intx, StackShadowPages, 6 DEBUG_ONLY(+2)); + +// Only used on 64 bit platforms +define_pd_global(uintx,HeapBaseMinAddress, 2*G); +// Only used on 64 bit Windows platforms +define_pd_global(bool, UseVectoredExceptions, false); + +#endif // OS_CPU_LINUX_PPC_VM_GLOBALS_LINUX_PPC_HPP diff --git a/src/os_cpu/linux_ppc/vm/orderAccess_linux_ppc.inline.hpp b/src/os_cpu/linux_ppc/vm/orderAccess_linux_ppc.inline.hpp new file mode 100644 --- /dev/null +++ b/src/os_cpu/linux_ppc/vm/orderAccess_linux_ppc.inline.hpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_PPC_VM_ORDERACCESS_LINUX_PPC_INLINE_HPP +#define OS_CPU_LINUX_PPC_VM_ORDERACCESS_LINUX_PPC_INLINE_HPP + +#include "runtime/orderAccess.hpp" +#include "vm_version_ppc.hpp" + +#ifndef PPC64 +#error "OrderAccess currently only implemented for PPC64" +#endif + +// Implementation of class OrderAccess. + +// +// Machine barrier instructions: +// +// - sync Two-way memory barrier, aka fence. +// - lwsync orders Store|Store, +// Load|Store, +// Load|Load, +// but not Store|Load +// - eieio orders Store|Store +// - isync Invalidates speculatively executed instructions, +// but isync may complete before storage accesses +// associated with instructions preceding isync have +// been performed. +// +// Semantic barrier instructions: +// (as defined in orderAccess.hpp) +// +// - release orders Store|Store, (maps to lwsync) +// Load|Store +// - acquire orders Load|Store, (maps to lwsync) +// Load|Load +// - fence orders Store|Store, (maps to sync) +// Load|Store, +// Load|Load, +// Store|Load +// + +#define inlasm_sync() __asm__ __volatile__ ("sync" : : : "memory"); +#define inlasm_lwsync() __asm__ __volatile__ ("lwsync" : : : "memory"); +#define inlasm_eieio() __asm__ __volatile__ ("eieio" : : : "memory"); +#define inlasm_isync() __asm__ __volatile__ ("isync" : : : "memory"); +#define inlasm_release() inlasm_lwsync(); +#define inlasm_acquire() inlasm_lwsync(); +// Use twi-isync for load_acquire (faster than lwsync). +#define inlasm_acquire_reg(X) __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (X) : "memory"); +#define inlasm_fence() inlasm_sync(); + +inline void OrderAccess::loadload() { inlasm_lwsync(); } +inline void OrderAccess::storestore() { inlasm_lwsync(); } +inline void OrderAccess::loadstore() { inlasm_lwsync(); } +inline void OrderAccess::storeload() { inlasm_fence(); } + +inline void OrderAccess::acquire() { inlasm_acquire(); } +inline void OrderAccess::release() { inlasm_release(); } +inline void OrderAccess::fence() { inlasm_fence(); } + +inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { register jbyte t = *p; inlasm_acquire_reg(t); return t; } +inline jshort OrderAccess::load_acquire(volatile jshort* p) { register jshort t = *p; inlasm_acquire_reg(t); return t; } +inline jint OrderAccess::load_acquire(volatile jint* p) { register jint t = *p; inlasm_acquire_reg(t); return t; } +inline jlong OrderAccess::load_acquire(volatile jlong* p) { register jlong t = *p; inlasm_acquire_reg(t); return t; } +inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { register jubyte t = *p; inlasm_acquire_reg(t); return t; } +inline jushort OrderAccess::load_acquire(volatile jushort* p) { register jushort t = *p; inlasm_acquire_reg(t); return t; } +inline juint OrderAccess::load_acquire(volatile juint* p) { register juint t = *p; inlasm_acquire_reg(t); return t; } +inline julong OrderAccess::load_acquire(volatile julong* p) { return (julong)load_acquire((volatile jlong*)p); } +inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { register jfloat t = *p; inlasm_acquire(); return t; } +inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { register jdouble t = *p; inlasm_acquire(); return t; } + +inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { return (intptr_t)load_acquire((volatile jlong*)p); } +inline void* OrderAccess::load_ptr_acquire(volatile void* p) { return (void*) load_acquire((volatile jlong*)p); } +inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { return (void*) load_acquire((volatile jlong*)p); } + +inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { inlasm_release(); *p = v; } +inline void OrderAccess::release_store(volatile jshort* p, jshort v) { inlasm_release(); *p = v; } +inline void OrderAccess::release_store(volatile jint* p, jint v) { inlasm_release(); *p = v; } +inline void OrderAccess::release_store(volatile jlong* p, jlong v) { inlasm_release(); *p = v; } +inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { inlasm_release(); *p = v; } +inline void OrderAccess::release_store(volatile jushort* p, jushort v) { inlasm_release(); *p = v; } +inline void OrderAccess::release_store(volatile juint* p, juint v) { inlasm_release(); *p = v; } +inline void OrderAccess::release_store(volatile julong* p, julong v) { inlasm_release(); *p = v; } +inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { inlasm_release(); *p = v; } +inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { inlasm_release(); *p = v; } + +inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { inlasm_release(); *p = v; } +inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { inlasm_release(); *(void* volatile *)p = v; } + +inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; inlasm_fence(); } +inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; inlasm_fence(); } +inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; inlasm_fence(); } +inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; inlasm_fence(); } +inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; inlasm_fence(); } +inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; inlasm_fence(); } +inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; inlasm_fence(); } +inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; inlasm_fence(); } +inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; inlasm_fence(); } +inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; inlasm_fence(); } + +inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; inlasm_fence(); } +inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; inlasm_fence(); } + +inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { inlasm_release(); *p = v; inlasm_fence(); } +inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { inlasm_release(); *p = v; inlasm_fence(); } +inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { inlasm_release(); *p = v; inlasm_fence(); } +inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { inlasm_release(); *p = v; inlasm_fence(); } +inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { inlasm_release(); *p = v; inlasm_fence(); } +inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { inlasm_release(); *p = v; inlasm_fence(); } +inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { inlasm_release(); *p = v; inlasm_fence(); } +inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { inlasm_release(); *p = v; inlasm_fence(); } +inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { inlasm_release(); *p = v; inlasm_fence(); } +inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { inlasm_release(); *p = v; inlasm_fence(); } + +inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { inlasm_release(); *p = v; inlasm_fence(); } +inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { inlasm_release(); *(void* volatile *)p = v; inlasm_fence(); } + +#undef inlasm_sync +#undef inlasm_lwsync +#undef inlasm_eieio +#undef inlasm_isync +#undef inlasm_release +#undef inlasm_acquire +#undef inlasm_fence + +#endif // OS_CPU_LINUX_PPC_VM_ORDERACCESS_LINUX_PPC_INLINE_HPP diff --git a/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp b/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp @@ -0,0 +1,607 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file hat + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// no precompiled headers +#include "assembler_ppc.inline.hpp" +#include "classfile/classLoader.hpp" +#include "classfile/systemDictionary.hpp" +#include "classfile/vmSymbols.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "jvm_linux.h" +#include "memory/allocation.inline.hpp" +#include "mutex_linux.inline.hpp" +#include "nativeInst_ppc.hpp" +#include "os_share_linux.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm.h" +#include "prims/jvm_misc.hpp" +#include "runtime/arguments.hpp" +#include "runtime/extendedPC.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/java.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/osThread.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/timer.hpp" +#include "utilities/events.hpp" +#include "utilities/vmError.hpp" + +// put OS-includes here +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + + +address os::current_stack_pointer() { + intptr_t* csp; + + // inline assembly `mr regno(csp), R1_SP': + __asm__ __volatile__ ("mr %0, 1":"=r"(csp):); + + return (address) csp; +} + +char* os::non_memory_address_word() { + // Must never look like an address returned by reserve_memory, + // even in its subfields (as defined by the CPU immediate fields, + // if the CPU splits constants across multiple instructions). + + return (char*) -1; +} + +void os::initialize_thread(Thread *thread) { } + +// Frame information (pc, sp, fp) retrieved via ucontext +// always looks like a C-frame according to the frame +// conventions in frame_ppc64.hpp. +address os::Linux::ucontext_get_pc(ucontext_t * uc) { + // On powerpc64, ucontext_t is not selfcontained but contains + // a pointer to an optional substructure (mcontext_t.regs) containing the volatile + // registers - NIP, among others. + // This substructure may or may not be there depending where uc came from: + // - if uc was handed over as the argument to a sigaction handler, a pointer to the + // substructure was provided by the kernel when calling the signal handler, and + // regs->nip can be accessed. + // - if uc was filled by getcontext(), it is undefined - getcontext() does not fill + // it because the volatile registers are not needed to make setcontext() work. + // Hopefully it was zero'd out beforehand. + guarantee(uc->uc_mcontext.regs != NULL, "only use ucontext_get_pc in sigaction context"); + return (address)uc->uc_mcontext.regs->nip; +} + +intptr_t* os::Linux::ucontext_get_sp(ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.regs->gpr[1/*REG_SP*/]; +} + +intptr_t* os::Linux::ucontext_get_fp(ucontext_t * uc) { + return NULL; +} + +ExtendedPC os::fetch_frame_from_context(void* ucVoid, + intptr_t** ret_sp, intptr_t** ret_fp) { + + ExtendedPC epc; + ucontext_t* uc = (ucontext_t*)ucVoid; + + if (uc != NULL) { + epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); + if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); + if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); + } else { + // construct empty ExtendedPC for return value checking + epc = ExtendedPC(NULL); + if (ret_sp) *ret_sp = (intptr_t *)NULL; + if (ret_fp) *ret_fp = (intptr_t *)NULL; + } + + return epc; +} + +frame os::fetch_frame_from_context(void* ucVoid) { + intptr_t* sp; + intptr_t* fp; + ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); + return frame(sp, epc.pc()); +} + +frame os::get_sender_for_C_frame(frame* fr) { + if (*fr->sp() == 0) { + // fr is the last C frame + return frame(NULL, NULL); + } + return frame(fr->sender_sp(), fr->sender_pc()); +} + + +frame os::current_frame() { + intptr_t* csp = (intptr_t*) *((intptr_t*) os::current_stack_pointer()); + // hack. + frame topframe(csp, (address)0x8); + // return sender of current topframe which hopefully has pc != NULL. + return os::get_sender_for_C_frame(&topframe); +} + +// Utility functions + +extern "C" JNIEXPORT int +JVM_handle_linux_signal(int sig, + siginfo_t* info, + void* ucVoid, + int abort_if_unrecognized) { + ucontext_t* uc = (ucontext_t*) ucVoid; + + Thread* t = ThreadLocalStorage::get_thread_slow(); + + SignalHandlerMark shm(t); + + // Note: it's not uncommon that JNI code uses signal/sigset to install + // then restore certain signal handler (e.g. to temporarily block SIGPIPE, + // or have a SIGILL handler when detecting CPU type). When that happens, + // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To + // avoid unnecessary crash when libjsig is not preloaded, try handle signals + // that do not require siginfo/ucontext first. + + if (sig == SIGPIPE) { + if (os::Linux::chained_handler(sig, info, ucVoid)) { + return true; + } else { + if (PrintMiscellaneous && (WizardMode || Verbose)) { + warning("Ignoring SIGPIPE - see bug 4229104"); + } + return true; + } + } + + JavaThread* thread = NULL; + VMThread* vmthread = NULL; + if (os::Linux::signal_handlers_are_installed) { + if (t != NULL) { + if(t->is_Java_thread()) { + thread = (JavaThread*)t; + } else if(t->is_VM_thread()) { + vmthread = (VMThread *)t; + } + } + } + + // Moved SafeFetch32 handling outside thread!=NULL conditional block to make + // it work if no associated JavaThread object exists. + if (uc) { + address const pc = os::Linux::ucontext_get_pc(uc); + if (pc && StubRoutines::is_safefetch_fault(pc)) { + uc->uc_mcontext.regs->nip = (unsigned long)StubRoutines::continuation_for_safefetch_fault(pc); + return true; + } + } + + // decide if this trap can be handled by a stub + address stub = NULL; + address pc = NULL; + + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { + pc = (address) os::Linux::ucontext_get_pc(uc); + + // Handle ALL stack overflow variations here + if (sig == SIGSEGV) { + // Si_addr may not be valid due to a bug in the linux-ppc64 kernel (see + // comment below). Use get_stack_bang_address instead of si_addr. + address addr = ((NativeInstruction*)pc)->get_stack_bang_address(uc); + + // Check if fault address is within thread stack. + if (addr < thread->stack_base() && + addr >= thread->stack_base() - thread->stack_size()) { + // stack overflow + if (thread->in_stack_yellow_zone(addr)) { + thread->disable_stack_yellow_zone(); + if (thread->thread_state() == _thread_in_Java) { + // Throw a stack overflow exception. + // Guard pages will be reenabled while unwinding the stack. + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); + } else { + // Thread was in the vm or native code. Return and try to finish. + return 1; + } + } else if (thread->in_stack_red_zone(addr)) { + // Fatal red zone violation. Disable the guard pages and fall through + // to handle_unexpected_exception way down below. + thread->disable_stack_red_zone(); + tty->print_raw_cr("An irrecoverable stack overflow has occurred."); + + // This is a likely cause, but hard to verify. Let's just print + // it as a hint. + tty->print_raw_cr("Please check if any of your loaded .so files has " + "enabled executable stack (see man page execstack(8))"); + } else { + // Accessing stack address below sp may cause SEGV if current + // thread has MAP_GROWSDOWN stack. This should only happen when + // current thread was created by user code with MAP_GROWSDOWN flag + // and then attached to VM. See notes in os_linux.cpp. + if (thread->osthread()->expanding_stack() == 0) { + thread->osthread()->set_expanding_stack(); + if (os::Linux::manually_expand_stack(thread, addr)) { + thread->osthread()->clear_expanding_stack(); + return 1; + } + thread->osthread()->clear_expanding_stack(); + } else { + fatal("recursive segv. expanding stack."); + } + } + } + } + + if (thread->thread_state() == _thread_in_Java) { + // Java thread running in Java code => find exception handler if any + // a fault inside compiled code, the interpreter, or a stub + + // A VM-related SIGILL may only occur if we are not in the zero page. + // On AIX, we get a SIGILL if we jump to 0x0 or to somewhere else + // in the zero page, because it is filled with 0x0. We ignore + // explicit SIGILLs in the zero page. + if (sig == SIGILL && (pc < (address) 0x200)) { + if (TraceTraps) + tty->print_raw_cr("SIGILL happened inside zero page."); + goto report_and_die; + } + + // Handle signal from NativeJump::patch_verified_entry(). + if (( TrapBasedNotEntrantChecks && sig == SIGTRAP && nativeInstruction_at(pc)->is_sigtrap_zombie_not_entrant()) || + (!TrapBasedNotEntrantChecks && sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant())) { + if (TraceTraps) + tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL"); + stub = SharedRuntime::get_handle_wrong_method_stub(); + } + + else if (sig == SIGSEGV && + // A linux-ppc64 kernel before 2.6.6 doesn't set si_addr on some segfaults + // in 64bit mode (cf. http://www.kernel.org/pub/linux/kernel/v2.6/ChangeLog-2.6.6), + // especially when we try to read from the safepoint polling page. So the check + // (address)info->si_addr == os::get_standard_polling_page() + // doesn't work for us. We use: + ((NativeInstruction*)pc)->is_safepoint_poll()) { + if (TraceTraps) + tty->print_cr("trap: safepoint_poll at " INTPTR_FORMAT " (SIGSEGV)", pc); + stub = SharedRuntime::get_poll_stub(pc); + } + + // SIGTRAP-based ic miss check in compiled code. + else if (sig == SIGTRAP && TrapBasedICMissChecks && + nativeInstruction_at(pc)->is_sigtrap_ic_miss_check()) { + if (TraceTraps) + tty->print_cr("trap: ic_miss_check at " INTPTR_FORMAT " (SIGTRAP)", pc); + stub = SharedRuntime::get_ic_miss_stub(); + } + + // SIGTRAP-based implicit null check in compiled code. + else if (sig == SIGTRAP && TrapBasedNullChecks && + nativeInstruction_at(pc)->is_sigtrap_null_check()) { + if (TraceTraps) + tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGTRAP)", pc); + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); + } + + // SIGSEGV-based implicit null check in compiled code. + else if (sig == SIGSEGV && ImplicitNullChecks && + CodeCache::contains((void*) pc) && + !MacroAssembler::needs_explicit_null_check((intptr_t) info->si_addr)) { + if (TraceTraps) + tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGSEGV)", pc); + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); + } + +#ifdef COMPILER2 + // SIGTRAP-based implicit range check in compiled code. + else if (sig == SIGTRAP && TrapBasedRangeChecks && + nativeInstruction_at(pc)->is_sigtrap_range_check()) { + if (TraceTraps) + tty->print_cr("trap: range_check at " INTPTR_FORMAT " (SIGTRAP)", pc); + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); + } +#endif + else if (sig == SIGBUS) { + // BugId 4454115: A read from a MappedByteBuffer can fault here if the + // underlying file has been truncated. Do not crash the VM in such a case. + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); + nmethod* nm = (cb != NULL && cb->is_nmethod()) ? (nmethod*)cb : NULL; + if (nm != NULL && nm->has_unsafe_access()) { + // We don't really need a stub here! Just set the pending exeption and + // continue at the next instruction after the faulting read. Returning + // garbage from this read is ok. + thread->set_pending_unsafe_access_error(); + uc->uc_mcontext.regs->nip = ((unsigned long)pc) + 4; + return true; + } + } + } + + else { // thread->thread_state() != _thread_in_Java + if (sig == SIGILL && VM_Version::is_determine_features_test_running()) { + // SIGILL must be caused by VM_Version::determine_features(). + *(int *)pc = 0; // patch instruction to 0 to indicate that it causes a SIGILL, + // flushing of icache is not necessary. + stub = pc + 4; // continue with next instruction. + } + else if (thread->thread_state() == _thread_in_vm && + sig == SIGBUS && thread->doing_unsafe_access()) { + // We don't really need a stub here! Just set the pending exeption and + // continue at the next instruction after the faulting read. Returning + // garbage from this read is ok. + thread->set_pending_unsafe_access_error(); + uc->uc_mcontext.regs->nip = ((unsigned long)pc) + 4; + return true; + } + } + + // Check to see if we caught the safepoint code in the + // process of write protecting the memory serialization page. + // It write enables the page immediately after protecting it + // so we can just return to retry the write. + if ((sig == SIGSEGV) && + // Si_addr may not be valid due to a bug in the linux-ppc64 kernel (see comment above). + // Use is_memory_serialization instead of si_addr. + ((NativeInstruction*)pc)->is_memory_serialization(thread, ucVoid)) { + // Synchronization problem in the pseudo memory barrier code (bug id 6546278) + // Block current thread until the memory serialize page permission restored. + os::block_on_serialize_page_trap(); + return true; + } + } + + if (stub != NULL) { + // Save all thread context in case we need to restore it. + if (thread != NULL) thread->set_saved_exception_pc(pc); + uc->uc_mcontext.regs->nip = (unsigned long)stub; + return true; + } + + // signal-chaining + if (os::Linux::chained_handler(sig, info, ucVoid)) { + return true; + } + + if (!abort_if_unrecognized) { + // caller wants another chance, so give it to him + return false; + } + + if (pc == NULL && uc != NULL) { + pc = os::Linux::ucontext_get_pc(uc); + } + +report_and_die: + // unmask current signal + sigset_t newset; + sigemptyset(&newset); + sigaddset(&newset, sig); + sigprocmask(SIG_UNBLOCK, &newset, NULL); + + VMError err(t, sig, pc, info, ucVoid); + err.report_and_die(); + + ShouldNotReachHere(); + return false; +} + +void os::Linux::init_thread_fpu_state(void) { + // Disable FP exceptions. + __asm__ __volatile__ ("mtfsfi 6,0"); +} + +int os::Linux::get_fpu_control_word(void) { + // x86 has problems with FPU precision after pthread_cond_timedwait(). + // nothing to do on ppc64. + return 0; +} + +void os::Linux::set_fpu_control_word(int fpu_control) { + // x86 has problems with FPU precision after pthread_cond_timedwait(). + // nothing to do on ppc64. +} + +//////////////////////////////////////////////////////////////////////////////// +// thread stack + +size_t os::Linux::min_stack_allowed = 768*K; + +bool os::Linux::supports_variable_stack_size() { return true; } + +// return default stack size for thr_type +size_t os::Linux::default_stack_size(os::ThreadType thr_type) { + // default stack size (compiler thread needs larger stack) + // Notice that the setting for compiler threads here have no impact + // because of the strange 'fallback logic' in os::create_thread(). + // Better set CompilerThreadStackSize in globals_.hpp if you want to + // specify a different stack size for compiler threads! + size_t s = (thr_type == os::compiler_thread ? 4 * M : 1024 * K); + return s; +} + +size_t os::Linux::default_guard_size(os::ThreadType thr_type) { + return 2 * page_size(); +} + +// Java thread: +// +// Low memory addresses +// +------------------------+ +// | |\ JavaThread created by VM does not have glibc +// | glibc guard page | - guard, attached Java thread usually has +// | |/ 1 page glibc guard. +// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() +// | |\ +// | HotSpot Guard Pages | - red and yellow pages +// | |/ +// +------------------------+ JavaThread::stack_yellow_zone_base() +// | |\ +// | Normal Stack | - +// | |/ +// P2 +------------------------+ Thread::stack_base() +// +// Non-Java thread: +// +// Low memory addresses +// +------------------------+ +// | |\ +// | glibc guard page | - usually 1 page +// | |/ +// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() +// | |\ +// | Normal Stack | - +// | |/ +// P2 +------------------------+ Thread::stack_base() +// +// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from +// pthread_attr_getstack() + +static void current_stack_region(address * bottom, size_t * size) { + if (os::Linux::is_initial_thread()) { + // initial thread needs special handling because pthread_getattr_np() + // may return bogus value. + *bottom = os::Linux::initial_thread_stack_bottom(); + *size = os::Linux::initial_thread_stack_size(); + } else { + pthread_attr_t attr; + + int rslt = pthread_getattr_np(pthread_self(), &attr); + + // JVM needs to know exact stack location, abort if it fails + if (rslt != 0) { + if (rslt == ENOMEM) { + vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np"); + } else { + fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt)); + } + } + + if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) { + fatal("Can not locate current stack attributes!"); + } + + pthread_attr_destroy(&attr); + + } + assert(os::current_stack_pointer() >= *bottom && + os::current_stack_pointer() < *bottom + *size, "just checking"); +} + +address os::current_stack_base() { + address bottom; + size_t size; + current_stack_region(&bottom, &size); + return (bottom + size); +} + +size_t os::current_stack_size() { + // stack size includes normal stack and HotSpot guard pages + address bottom; + size_t size; + current_stack_region(&bottom, &size); + return size; +} + +///////////////////////////////////////////////////////////////////////////// +// helper functions for fatal error handler + +void os::print_context(outputStream *st, void *context) { + if (context == NULL) return; + + ucontext_t* uc = (ucontext_t*)context; + + st->print_cr("Registers:"); + st->print("pc =" INTPTR_FORMAT " ", uc->uc_mcontext.regs->nip); + st->print("lr =" INTPTR_FORMAT " ", uc->uc_mcontext.regs->link); + st->print("ctr=" INTPTR_FORMAT " ", uc->uc_mcontext.regs->ctr); + st->cr(); + for (int i = 0; i < 32; i++) { + st->print("r%-2d=" INTPTR_FORMAT " ", i, uc->uc_mcontext.regs->gpr[i]); + if (i % 3 == 2) st->cr(); + } + st->cr(); + st->cr(); + + intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); + st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", sp); + print_hex_dump(st, (address)sp, (address)(sp + 128), sizeof(intptr_t)); + st->cr(); + + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. + address pc = os::Linux::ucontext_get_pc(uc); + st->print_cr("Instructions: (pc=" PTR_FORMAT ")", pc); + print_hex_dump(st, pc - 64, pc + 64, /*instrsize=*/4); + st->cr(); +} + +void os::print_register_info(outputStream *st, void *context) { + if (context == NULL) return; + + ucontext_t *uc = (ucontext_t*)context; + + st->print_cr("Register to memory mapping:"); + st->cr(); + + // this is only for the "general purpose" registers + for (int i = 0; i < 32; i++) { + st->print("r%-2d=", i); + print_location(st, uc->uc_mcontext.regs->gpr[i]); + } + st->cr(); +} + +extern "C" { + int SpinPause() { + return 0; + } +} + +#ifndef PRODUCT +void os::verify_stack_alignment() { + assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); +} +#endif diff --git a/src/os_cpu/linux_ppc/vm/os_linux_ppc.hpp b/src/os_cpu/linux_ppc/vm/os_linux_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/os_cpu/linux_ppc/vm/os_linux_ppc.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_PPC_VM_OS_LINUX_PPC_HPP +#define OS_CPU_LINUX_PPC_VM_OS_LINUX_PPC_HPP + + static void setup_fpu() {} + + // Used to register dynamic code cache area with the OS + // Note: Currently only used in 64 bit Windows implementations + static bool register_code_area(char *low, char *high) { return true; } + +#endif // OS_CPU_LINUX_PPC_VM_OS_LINUX_PPC_HPP diff --git a/src/os_cpu/linux_ppc/vm/prefetch_linux_ppc.inline.hpp b/src/os_cpu/linux_ppc/vm/prefetch_linux_ppc.inline.hpp new file mode 100644 --- /dev/null +++ b/src/os_cpu/linux_ppc/vm/prefetch_linux_ppc.inline.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_PPC_VM_PREFETCH_LINUX_PPC_INLINE_HPP +#define OS_CPU_LINUX_PPC_VM_PREFETCH_LINUX_PPC_INLINE_HPP + +#include "runtime/prefetch.hpp" + + +inline void Prefetch::read(void *loc, intx interval) { + __asm__ __volatile__ ( + " dcbt 0, %0 \n" + : + : /*%0*/"r" ( ((address)loc) +((long)interval) ) + //: + ); +} + +inline void Prefetch::write(void *loc, intx interval) { + __asm__ __volatile__ ( + " dcbtst 0, %0 \n" + : + : /*%0*/"r" ( ((address)loc) +((long)interval) ) + //: + ); +} + +#endif // OS_CPU_LINUX_PPC_VM_PREFETCH_LINUX_OJDKPPC_HPP diff --git a/src/os_cpu/linux_ppc/vm/threadLS_linux_ppc.cpp b/src/os_cpu/linux_ppc/vm/threadLS_linux_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/os_cpu/linux_ppc/vm/threadLS_linux_ppc.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/threadLocalStorage.hpp" + +void ThreadLocalStorage::generate_code_for_get_thread() { + // nothing we can do here for user-level thread +} + +void ThreadLocalStorage::pd_init() { + // Nothing to do +} + +void ThreadLocalStorage::pd_set_thread(Thread* thread) { + os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread); +} diff --git a/src/os_cpu/linux_ppc/vm/threadLS_linux_ppc.hpp b/src/os_cpu/linux_ppc/vm/threadLS_linux_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/os_cpu/linux_ppc/vm/threadLS_linux_ppc.hpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_PPC_VM_THREADLS_LINUX_PPC_HPP +#define OS_CPU_LINUX_PPC_VM_THREADLS_LINUX_PPC_HPP + + // Processor dependent parts of ThreadLocalStorage + +public: + static Thread* thread() { + return (Thread *) os::thread_local_storage_at(thread_index()); + } + +#endif // OS_CPU_LINUX_PPC_VM_THREADLS_LINUX_PPC_HPP diff --git a/src/os_cpu/linux_ppc/vm/thread_linux_ppc.cpp b/src/os_cpu/linux_ppc/vm/thread_linux_ppc.cpp new file mode 100644 --- /dev/null +++ b/src/os_cpu/linux_ppc/vm/thread_linux_ppc.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/frame.inline.hpp" +#include "thread_linux.inline.hpp" + +// Forte Analyzer AsyncGetCallTrace profiling support is not implemented on Linux/PPC. +bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, bool isInJava) { + Unimplemented(); + return false; +} + +void JavaThread::cache_global_variables() { } diff --git a/src/os_cpu/linux_ppc/vm/thread_linux_ppc.hpp b/src/os_cpu/linux_ppc/vm/thread_linux_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/os_cpu/linux_ppc/vm/thread_linux_ppc.hpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_PPC_VM_THREAD_LINUX_PPC_HPP +#define OS_CPU_LINUX_PPC_VM_THREAD_LINUX_PPC_HPP + + private: + + void pd_initialize() { + _anchor.clear(); + _last_interpreter_fp = NULL; + } + + // The `last' frame is the youngest Java frame on the thread's stack. + frame pd_last_frame() { + assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); + + intptr_t* sp = last_Java_sp(); + address pc = _anchor.last_Java_pc(); + + // Last_Java_pc ist not set, if we come here from compiled code. + if (pc == NULL) { + pc = (address) *(sp + 2); + } + + return frame(sp, pc); + } + + public: + + void set_base_of_stack_pointer(intptr_t* base_sp) {} + intptr_t* base_of_stack_pointer() { return NULL; } + void record_base_of_stack_pointer() {} + + // These routines are only used on cpu architectures that + // have separate register stacks (Itanium). + static bool register_stack_overflow() { return false; } + static void enable_register_stack_guard() {} + static void disable_register_stack_guard() {} + + bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, bool isInJava); + + protected: + + // -Xprof support + // + // In order to find the last Java fp from an async profile + // tick, we store the current interpreter fp in the thread. + // This value is only valid while we are in the C++ interpreter + // and profiling. + intptr_t *_last_interpreter_fp; + + public: + + static ByteSize last_interpreter_fp_offset() { + return byte_offset_of(JavaThread, _last_interpreter_fp); + } + + intptr_t* last_interpreter_fp() { return _last_interpreter_fp; } + +#endif // OS_CPU_LINUX_PPC_VM_THREAD_LINUX_PPC_HPP diff --git a/src/os_cpu/linux_ppc/vm/vmStructs_linux_ppc.hpp b/src/os_cpu/linux_ppc/vm/vmStructs_linux_ppc.hpp new file mode 100644 --- /dev/null +++ b/src/os_cpu/linux_ppc/vm/vmStructs_linux_ppc.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_PPC_VM_VMSTRUCTS_LINUX_PPC_HPP +#define OS_CPU_LINUX_PPC_VM_VMSTRUCTS_LINUX_PPC_HPP + +// These are the OS and CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* Threads (NOTE: incomplete) */ \ + /******************************/ \ + nonstatic_field(OSThread, _thread_id, pid_t) \ + nonstatic_field(OSThread, _pthread_id, pthread_t) + + +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + \ + /**********************/ \ + /* Posix Thread IDs */ \ + /**********************/ \ + \ + declare_integer_type(pid_t) \ + declare_unsigned_integer_type(pthread_t) + +#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // OS_CPU_LINUX_PPC_VM_VMSTRUCTS_LINUX_PPC_HPP diff --git a/src/share/vm/runtime/globals.hpp b/src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp +++ b/src/share/vm/runtime/globals.hpp @@ -3617,7 +3617,7 @@ NOT_LP64(LINUX_ONLY(2*G) NOT_LINUX(0)), \ "Address to allocate shared memory region for class data") \ \ - diagnostic(bool, EnableInvokeDynamic, true PPC64_ONLY(&& false), \ + diagnostic(bool, EnableInvokeDynamic, true, \ "support JSR 292 (method handles, invokedynamic, " \ "anonymous classes") \ \