1239 #ifdef AARCH64 1240 ldr(ZR, Address(reg)); 1241 #else 1242 assert_different_registers(reg, tmp); 1243 if (tmp == noreg) { 1244 tmp = Rtemp; 1245 assert((! Thread::current()->is_Compiler_thread()) || 1246 (! (ciEnv::current()->task() == NULL)) || 1247 (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)), 1248 "Rtemp not available in C2"); // explicit tmp register required 1249 // XXX: could we mark the code buffer as not compatible with C2 ? 1250 } 1251 ldr(tmp, Address(reg)); 1252 #endif 1253 } 1254 } 1255 1256 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1257 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, 1258 RegisterOrConstant size_expression, Label& slow_case) { 1259 if (!Universe::heap()->supports_inline_contig_alloc()) { 1260 b(slow_case); 1261 return; 1262 } 1263 1264 CollectedHeap* ch = Universe::heap(); 1265 1266 const Register top_addr = tmp1; 1267 const Register heap_end = tmp2; 1268 1269 if (size_expression.is_register()) { 1270 assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register()); 1271 } else { 1272 assert_different_registers(obj, obj_end, top_addr, heap_end); 1273 } 1274 1275 bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance 1276 if (load_const) { 1277 mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); 1278 } else { 1279 ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset())); 1280 } 1281 // Calculate new heap_top by adding the size of the object 1282 Label retry; 1283 bind(retry); 1284 1285 #ifdef AARCH64 1286 ldxr(obj, top_addr); 1287 #else 1288 ldr(obj, Address(top_addr)); 1289 #endif // AARCH64 1290 1291 ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); 1292 add_rc(obj_end, obj, size_expression); 1293 // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case. 1294 cmp(obj_end, obj); 1295 b(slow_case, lo); 1296 // Update heap_top if allocation succeeded 1297 cmp(obj_end, heap_end); 1298 b(slow_case, hi); 1299 1300 #ifdef AARCH64 1301 stxr(heap_end/*scratched*/, obj_end, top_addr); 1302 cbnz_w(heap_end, retry); 1303 #else 1304 atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); 1305 b(retry, ne); 1306 #endif // AARCH64 1307 } 1308 1309 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1310 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1, 1311 RegisterOrConstant size_expression, Label& slow_case) { 1312 const Register tlab_end = tmp1; 1313 assert_different_registers(obj, obj_end, tlab_end); 1314 1315 ldr(obj, Address(Rthread, JavaThread::tlab_top_offset())); 1316 ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset())); 1317 add_rc(obj_end, obj, size_expression); 1318 cmp(obj_end, tlab_end); 1319 b(slow_case, hi); 1320 str(obj_end, Address(Rthread, JavaThread::tlab_top_offset())); 1321 } 1322 1323 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers. 1324 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) { 1325 Label loop; 1326 const Register ptr = start; 1327 1328 #ifdef AARCH64 1329 // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x 1330 const Register size = tmp; 1331 Label remaining, done; 1332 1333 sub(size, end, start); 1334 1335 #ifdef ASSERT 1336 { Label L; 1337 tst(size, wordSize - 1); 1338 b(L, eq); 1339 stop("size is not a multiple of wordSize"); 1340 bind(L); 1343 1344 subs(size, size, wordSize); 1345 b(remaining, le); 1346 1347 // Zero by 2 words per iteration. 1348 bind(loop); 1349 subs(size, size, 2*wordSize); 1350 stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); 1351 b(loop, gt); 1352 1353 bind(remaining); 1354 b(done, ne); 1355 str(ZR, Address(ptr)); 1356 bind(done); 1357 #else 1358 mov(tmp, 0); 1359 bind(loop); 1360 cmp(ptr, end); 1361 str(tmp, Address(ptr, wordSize, post_indexed), lo); 1362 b(loop, lo); 1363 #endif // AARCH64 1364 } 1365 1366 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) { 1367 #ifdef AARCH64 1368 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1369 add_rc(tmp, tmp, size_in_bytes); 1370 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1371 #else 1372 // Bump total bytes allocated by this thread 1373 Label done; 1374 1375 // Borrow the Rthread for alloc counter 1376 Register Ralloc = Rthread; 1377 add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset())); 1378 ldr(tmp, Address(Ralloc)); 1379 adds(tmp, tmp, size_in_bytes); 1380 str(tmp, Address(Ralloc), cc); 1381 b(done, cc); 1382 1383 // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated) 1384 // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by 1385 // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself. 1386 Register low, high; 1387 // Select ether R0/R1 or R2/R3 1388 1389 if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) { 1390 low = R2; 1391 high = R3; 1392 } else { 1393 low = R0; 1394 high = R1; 1395 } 1396 push(RegisterSet(low, high)); 1397 1398 ldrd(low, Address(Ralloc)); 1399 adds(low, low, size_in_bytes); 1400 adc(high, high, 0); 1401 strd(low, Address(Ralloc)); 1402 1403 pop(RegisterSet(low, high)); 1404 1405 bind(done); 1406 1407 // Unborrow the Rthread 1408 sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset())); 1409 #endif // AARCH64 1410 } 1411 1412 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { 1413 // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM 1414 if (UseStackBanging) { 1415 const int page_size = os::vm_page_size(); 1416 1417 sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); 1418 strb(R0, Address(tmp)); 1419 #ifdef AARCH64 1420 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { 1421 sub(tmp, tmp, page_size); 1422 strb(R0, Address(tmp)); 1423 } 1424 #else 1425 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { 1426 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1427 } 1428 #endif // AARCH64 | 1239 #ifdef AARCH64 1240 ldr(ZR, Address(reg)); 1241 #else 1242 assert_different_registers(reg, tmp); 1243 if (tmp == noreg) { 1244 tmp = Rtemp; 1245 assert((! Thread::current()->is_Compiler_thread()) || 1246 (! (ciEnv::current()->task() == NULL)) || 1247 (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)), 1248 "Rtemp not available in C2"); // explicit tmp register required 1249 // XXX: could we mark the code buffer as not compatible with C2 ? 1250 } 1251 ldr(tmp, Address(reg)); 1252 #endif 1253 } 1254 } 1255 1256 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1257 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, 1258 RegisterOrConstant size_expression, Label& slow_case) { 1259 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 1260 bs->eden_allocate(this, obj, obj_end, tmp1, tmp2, size_expression, slow_case); 1261 } 1262 1263 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1264 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1, 1265 RegisterOrConstant size_expression, Label& slow_case) { 1266 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 1267 bs->tlab_allocate(this, obj, obj_end, tmp1, size_expression, slow_case); 1268 } 1269 1270 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers. 1271 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) { 1272 Label loop; 1273 const Register ptr = start; 1274 1275 #ifdef AARCH64 1276 // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x 1277 const Register size = tmp; 1278 Label remaining, done; 1279 1280 sub(size, end, start); 1281 1282 #ifdef ASSERT 1283 { Label L; 1284 tst(size, wordSize - 1); 1285 b(L, eq); 1286 stop("size is not a multiple of wordSize"); 1287 bind(L); 1290 1291 subs(size, size, wordSize); 1292 b(remaining, le); 1293 1294 // Zero by 2 words per iteration. 1295 bind(loop); 1296 subs(size, size, 2*wordSize); 1297 stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); 1298 b(loop, gt); 1299 1300 bind(remaining); 1301 b(done, ne); 1302 str(ZR, Address(ptr)); 1303 bind(done); 1304 #else 1305 mov(tmp, 0); 1306 bind(loop); 1307 cmp(ptr, end); 1308 str(tmp, Address(ptr, wordSize, post_indexed), lo); 1309 b(loop, lo); 1310 #endif // AARCH64 1311 } 1312 1313 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { 1314 // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM 1315 if (UseStackBanging) { 1316 const int page_size = os::vm_page_size(); 1317 1318 sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); 1319 strb(R0, Address(tmp)); 1320 #ifdef AARCH64 1321 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { 1322 sub(tmp, tmp, page_size); 1323 strb(R0, Address(tmp)); 1324 } 1325 #else 1326 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { 1327 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1328 } 1329 #endif // AARCH64 |