2342 2343 generate_conjoint_int_copy_core(aligned); 2344 2345 // O3, O4 are used as temp registers 2346 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); 2347 __ retl(); 2348 __ delayed()->mov(G0, O0); // return 0 2349 return start; 2350 } 2351 2352 // 2353 // Helper methods for generate_disjoint_long_copy_core() 2354 // 2355 void copy_64_bytes_loop(Register from, Register to, Register count, int count_dec, 2356 Label& L_loop, bool use_prefetch, bool use_bis) { 2357 __ align(OptoLoopAlignment); 2358 __ BIND(L_loop); 2359 for (int off = 0; off < 64; off += 16) { 2360 if (use_prefetch && (off & 31) == 0) { 2361 if (ArraycopySrcPrefetchDistance > 0) { 2362 __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads); 2363 } 2364 if (ArraycopyDstPrefetchDistance > 0) { 2365 __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads); 2366 } 2367 } 2368 __ ldx(from, off+0, O4); 2369 __ ldx(from, off+8, O5); 2370 if (use_bis) { 2371 __ stxa(O4, to, off+0); 2372 __ stxa(O5, to, off+8); 2373 } else { 2374 __ stx(O4, to, off+0); 2375 __ stx(O5, to, off+8); 2376 } 2377 } 2378 __ deccc(count, 8); 2379 __ inc(from, 64); 2380 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 2381 __ delayed()->inc(to, 64); 2382 } 2383 2384 // 2385 // Generate core code for disjoint long copy (and oop copy on 64-bit). | 2342 2343 generate_conjoint_int_copy_core(aligned); 2344 2345 // O3, O4 are used as temp registers 2346 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); 2347 __ retl(); 2348 __ delayed()->mov(G0, O0); // return 0 2349 return start; 2350 } 2351 2352 // 2353 // Helper methods for generate_disjoint_long_copy_core() 2354 // 2355 void copy_64_bytes_loop(Register from, Register to, Register count, int count_dec, 2356 Label& L_loop, bool use_prefetch, bool use_bis) { 2357 __ align(OptoLoopAlignment); 2358 __ BIND(L_loop); 2359 for (int off = 0; off < 64; off += 16) { 2360 if (use_prefetch && (off & 31) == 0) { 2361 if (ArraycopySrcPrefetchDistance > 0) { 2362 __ prefetch(from, ArraycopySrcPrefetchDistance+off, Assembler::severalReads); 2363 } 2364 if (ArraycopyDstPrefetchDistance > 0) { 2365 __ prefetch(to, ArraycopyDstPrefetchDistance+off, Assembler::severalWritesAndPossiblyReads); 2366 } 2367 } 2368 __ ldx(from, off+0, O4); 2369 __ ldx(from, off+8, O5); 2370 if (use_bis) { 2371 __ stxa(O4, to, off+0); 2372 __ stxa(O5, to, off+8); 2373 } else { 2374 __ stx(O4, to, off+0); 2375 __ stx(O5, to, off+8); 2376 } 2377 } 2378 __ deccc(count, 8); 2379 __ inc(from, 64); 2380 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); 2381 __ delayed()->inc(to, 64); 2382 } 2383 2384 // 2385 // Generate core code for disjoint long copy (and oop copy on 64-bit). |