src/cpu/ppc/vm/templateTable_ppc_64.cpp

Print this page
rev 6728 : 8050942: PPC64: implement template interpreter for ppc64le
Contributed-by: asmundak@google.com

*** 186,197 **** // calls out to InterpreterRuntime::resolve_get_put to do // additional, required work. assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); assert(load_bc_into_bc_reg, "we use bc_reg as temp"); __ get_cache_and_index_at_bcp(Rtemp /* dst = cache */, 1); ! // Big Endian: ((*(cache+indices))>>((1+byte_no)*8))&0xFF __ lbz(Rnew_bc, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (1 + byte_no), Rtemp); __ cmpwi(CCR0, Rnew_bc, 0); __ li(Rnew_bc, (unsigned int)(unsigned char)new_bc); __ beq(CCR0, L_patch_done); // __ isync(); // acquire not needed break; --- 186,201 ---- // calls out to InterpreterRuntime::resolve_get_put to do // additional, required work. assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); assert(load_bc_into_bc_reg, "we use bc_reg as temp"); __ get_cache_and_index_at_bcp(Rtemp /* dst = cache */, 1); ! // ((*(cache+indices))>>((1+byte_no)*8))&0xFF: ! #if defined(VM_LITTLE_ENDIAN) ! __ lbz(Rnew_bc, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 1 + byte_no, Rtemp); ! #else __ lbz(Rnew_bc, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (1 + byte_no), Rtemp); + #endif __ cmpwi(CCR0, Rnew_bc, 0); __ li(Rnew_bc, (unsigned int)(unsigned char)new_bc); __ beq(CCR0, L_patch_done); // __ isync(); // acquire not needed break;
*** 1836,1847 **** // Align bcp. __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt); __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt)); // Load lo & hi. ! __ lwz(Rlow_byte, BytesPerInt, Rdef_offset_addr); ! __ lwz(Rhigh_byte, BytesPerInt * 2, Rdef_offset_addr); // Check for default case (=index outside [low,high]). __ cmpw(CCR0, R17_tos, Rlow_byte); __ cmpw(CCR1, R17_tos, Rhigh_byte); __ blt(CCR0, Ldefault_case); --- 1840,1851 ---- // Align bcp. __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt); __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt)); // Load lo & hi. ! __ get_u4(Rlow_byte, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned); ! __ get_u4(Rhigh_byte, Rdef_offset_addr, 2 *BytesPerInt, InterpreterMacroAssembler::Unsigned); // Check for default case (=index outside [low,high]). __ cmpw(CCR0, R17_tos, Rlow_byte); __ cmpw(CCR1, R17_tos, Rhigh_byte); __ blt(CCR0, Ldefault_case);
*** 1851,1866 **** __ sub(Rindex, R17_tos, Rlow_byte); __ extsw(Rindex, Rindex); __ profile_switch_case(Rindex, Rhigh_byte /* scratch */, Rscratch1, Rscratch2); __ sldi(Rindex, Rindex, LogBytesPerInt); __ addi(Rindex, Rindex, 3 * BytesPerInt); __ lwax(Roffset, Rdef_offset_addr, Rindex); __ b(Ldispatch); __ bind(Ldefault_case); __ profile_switch_default(Rhigh_byte, Rscratch1); ! __ lwa(Roffset, 0, Rdef_offset_addr); __ bind(Ldispatch); __ add(R14_bcp, Roffset, R14_bcp); __ dispatch_next(vtos); --- 1855,1875 ---- __ sub(Rindex, R17_tos, Rlow_byte); __ extsw(Rindex, Rindex); __ profile_switch_case(Rindex, Rhigh_byte /* scratch */, Rscratch1, Rscratch2); __ sldi(Rindex, Rindex, LogBytesPerInt); __ addi(Rindex, Rindex, 3 * BytesPerInt); + #if defined(VM_LITTLE_ENDIAN) + __ lwbrx(Roffset, Rdef_offset_addr, Rindex); + __ extsw(Roffset, Roffset); + #else __ lwax(Roffset, Rdef_offset_addr, Rindex); + #endif __ b(Ldispatch); __ bind(Ldefault_case); __ profile_switch_default(Rhigh_byte, Rscratch1); ! __ get_u4(Roffset, Rdef_offset_addr, 0, InterpreterMacroAssembler::Signed); __ bind(Ldispatch); __ add(R14_bcp, Roffset, R14_bcp); __ dispatch_next(vtos);
*** 1872,1882 **** } // Table switch using linear search through cases. // Bytecode stream format: // Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ... ! // Note: Everything is big-endian format here. So on little endian machines, we have to revers offset and count and cmp value. void TemplateTable::fast_linearswitch() { transition(itos, vtos); Label Lloop_entry, Lsearch_loop, Lfound, Lcontinue_execution, Ldefault_case; --- 1881,1891 ---- } // Table switch using linear search through cases. // Bytecode stream format: // Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ... ! // Note: Everything is big-endian format here. void TemplateTable::fast_linearswitch() { transition(itos, vtos); Label Lloop_entry, Lsearch_loop, Lfound, Lcontinue_execution, Ldefault_case;
*** 1891,1901 **** // Align bcp. __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt); __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt)); // Setup loop counter and limit. ! __ lwz(Rcount, BytesPerInt, Rdef_offset_addr); // Load count. __ addi(Rcurrent_pair, Rdef_offset_addr, 2 * BytesPerInt); // Rcurrent_pair now points to first pair. // Set up search loop. __ cmpwi(CCR0, Rcount, 0); __ beq(CCR0, Ldefault_case); --- 1900,1910 ---- // Align bcp. __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt); __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt)); // Setup loop counter and limit. ! __ get_u4(Rcount, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned); __ addi(Rcurrent_pair, Rdef_offset_addr, 2 * BytesPerInt); // Rcurrent_pair now points to first pair. // Set up search loop. __ cmpwi(CCR0, Rcount, 0); __ beq(CCR0, Ldefault_case);
*** 1903,1925 **** __ mtctr(Rcount); // linear table search __ bind(Lsearch_loop); ! __ lwz(Rvalue, 0, Rcurrent_pair); ! __ lwa(Roffset, 1 * BytesPerInt, Rcurrent_pair); __ cmpw(CCR0, Rvalue, Rcmp_value); __ beq(CCR0, Lfound); __ addi(Rcurrent_pair, Rcurrent_pair, 2 * BytesPerInt); __ bdnz(Lsearch_loop); // default case __ bind(Ldefault_case); ! __ lwa(Roffset, 0, Rdef_offset_addr); if (ProfileInterpreter) { __ profile_switch_default(Rdef_offset_addr, Rcount/* scratch */); __ b(Lcontinue_execution); } --- 1912,1936 ---- __ mtctr(Rcount); // linear table search __ bind(Lsearch_loop); ! // TODO(asmundak): there is no need to fetch bytecode offset immediately, ! // do it only when we have found the matching value. ! __ get_u4(Rvalue, Rcurrent_pair, 0, InterpreterMacroAssembler::Unsigned); ! __ get_u4(Roffset, Rcurrent_pair, BytesPerInt, InterpreterMacroAssembler::Signed); __ cmpw(CCR0, Rvalue, Rcmp_value); __ beq(CCR0, Lfound); __ addi(Rcurrent_pair, Rcurrent_pair, 2 * BytesPerInt); __ bdnz(Lsearch_loop); // default case __ bind(Ldefault_case); ! __ get_u4(Roffset, Rdef_offset_addr, 0, InterpreterMacroAssembler::Unsigned); if (ProfileInterpreter) { __ profile_switch_default(Rdef_offset_addr, Rcount/* scratch */); __ b(Lcontinue_execution); }
*** 1987,1997 **** __ addi(Rarray, R14_bcp, 3 * BytesPerInt); __ clrrdi(Rarray, Rarray, log2_long((jlong)BytesPerInt)); // initialize i & j __ li(Ri,0); ! __ lwz(Rj, -BytesPerInt, Rarray); // and start. Label entry; __ b(entry); --- 1998,2008 ---- __ addi(Rarray, R14_bcp, 3 * BytesPerInt); __ clrrdi(Rarray, Rarray, log2_long((jlong)BytesPerInt)); // initialize i & j __ li(Ri,0); ! __ get_u4(Rj, Rarray, -BytesPerInt, InterpreterMacroAssembler::Unsigned); // and start. Label entry; __ b(entry);
*** 2004,2014 **** --- 2015,2029 ---- // j = h; // } else { // i = h; // } __ sldi(Rscratch, Rh, log_entry_size); + #if defined(VM_LITTLE_ENDIAN) + __ lwbrx(Rscratch, Rscratch, Rarray); + #else __ lwzx(Rscratch, Rscratch, Rarray); + #endif // if (key < current value) // Rh = Rj // else // Rh = Ri
*** 2036,2059 **** __ mr(Rh, Ri); // Save index in i for profiling. } // Ri = value offset __ sldi(Ri, Ri, log_entry_size); __ add(Ri, Ri, Rarray); ! __ lwz(Rscratch, 0, Ri); Label not_found; // Ri = offset offset __ cmpw(CCR0, Rkey, Rscratch); __ beq(CCR0, not_found); // entry not found -> j = default offset ! __ lwz(Rj, -2 * BytesPerInt, Rarray); __ b(default_case); __ bind(not_found); // entry found -> j = offset __ profile_switch_case(Rh, Rj, Rscratch, Rkey); ! __ lwz(Rj, BytesPerInt, Ri); if (ProfileInterpreter) { __ b(continue_execution); } --- 2051,2074 ---- __ mr(Rh, Ri); // Save index in i for profiling. } // Ri = value offset __ sldi(Ri, Ri, log_entry_size); __ add(Ri, Ri, Rarray); ! __ get_u4(Rscratch, Ri, 0, InterpreterMacroAssembler::Unsigned); Label not_found; // Ri = offset offset __ cmpw(CCR0, Rkey, Rscratch); __ beq(CCR0, not_found); // entry not found -> j = default offset ! __ get_u4(Rj, Rarray, -2 * BytesPerInt, InterpreterMacroAssembler::Unsigned); __ b(default_case); __ bind(not_found); // entry found -> j = offset __ profile_switch_case(Rh, Rj, Rscratch, Rkey); ! __ get_u4(Rj, Ri, BytesPerInt, InterpreterMacroAssembler::Unsigned); if (ProfileInterpreter) { __ b(continue_execution); }
*** 2144,2155 **** __ get_cache_and_index_at_bcp(Rcache, 1, index_size); Label Lresolved, Ldone; assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); // We are resolved if the indices offset contains the current bytecode. ! // Big Endian: __ lbz(Rscratch, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (byte_no + 1), Rcache); // Acquire by cmp-br-isync (see below). __ cmpdi(CCR0, Rscratch, (int)bytecode()); __ beq(CCR0, Lresolved); address entry = NULL; --- 2159,2173 ---- __ get_cache_and_index_at_bcp(Rcache, 1, index_size); Label Lresolved, Ldone; assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); // We are resolved if the indices offset contains the current bytecode. ! #if defined(VM_LITTLE_ENDIAN) ! __ lbz(Rscratch, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + byte_no + 1, Rcache); ! #else __ lbz(Rscratch, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (byte_no + 1), Rcache); + #endif // Acquire by cmp-br-isync (see below). __ cmpdi(CCR0, Rscratch, (int)bytecode()); __ beq(CCR0, Lresolved); address entry = NULL;