< prev index next >

src/hotspot/cpu/arm/macroAssembler_arm.cpp

Print this page




1239 #ifdef AARCH64
1240     ldr(ZR, Address(reg));
1241 #else
1242     assert_different_registers(reg, tmp);
1243     if (tmp == noreg) {
1244       tmp = Rtemp;
1245       assert((! Thread::current()->is_Compiler_thread()) ||
1246              (! (ciEnv::current()->task() == NULL)) ||
1247              (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
1248              "Rtemp not available in C2"); // explicit tmp register required
1249       // XXX: could we mark the code buffer as not compatible with C2 ?
1250     }
1251     ldr(tmp, Address(reg));
1252 #endif
1253   }
1254 }
1255 
1256 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1257 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
1258                                  RegisterOrConstant size_expression, Label& slow_case) {
1259   if (!Universe::heap()->supports_inline_contig_alloc()) {
1260     b(slow_case);
1261     return;
1262   }
1263 
1264   CollectedHeap* ch = Universe::heap();
1265 
1266   const Register top_addr = tmp1;
1267   const Register heap_end = tmp2;
1268 
1269   if (size_expression.is_register()) {
1270     assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
1271   } else {
1272     assert_different_registers(obj, obj_end, top_addr, heap_end);
1273   }
1274 
1275   bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
1276   if (load_const) {
1277     mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
1278   } else {
1279     ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
1280   }
1281   // Calculate new heap_top by adding the size of the object
1282   Label retry;
1283   bind(retry);
1284 
1285 #ifdef AARCH64
1286   ldxr(obj, top_addr);
1287 #else
1288   ldr(obj, Address(top_addr));
1289 #endif // AARCH64
1290 
1291   ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
1292   add_rc(obj_end, obj, size_expression);
1293   // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
1294   cmp(obj_end, obj);
1295   b(slow_case, lo);
1296   // Update heap_top if allocation succeeded
1297   cmp(obj_end, heap_end);
1298   b(slow_case, hi);
1299 
1300 #ifdef AARCH64
1301   stxr(heap_end/*scratched*/, obj_end, top_addr);
1302   cbnz_w(heap_end, retry);
1303 #else
1304   atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
1305   b(retry, ne);
1306 #endif // AARCH64
1307 }
1308 
1309 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1310 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
1311                                  RegisterOrConstant size_expression, Label& slow_case) {
1312   const Register tlab_end = tmp1;
1313   assert_different_registers(obj, obj_end, tlab_end);
1314 
1315   ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
1316   ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
1317   add_rc(obj_end, obj, size_expression);
1318   cmp(obj_end, tlab_end);
1319   b(slow_case, hi);
1320   str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
1321 }
1322 
1323 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
1324 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
1325   Label loop;
1326   const Register ptr = start;
1327 
1328 #ifdef AARCH64
1329   // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
1330   const Register size = tmp;
1331   Label remaining, done;
1332 
1333   sub(size, end, start);
1334 
1335 #ifdef ASSERT
1336   { Label L;
1337     tst(size, wordSize - 1);
1338     b(L, eq);
1339     stop("size is not a multiple of wordSize");
1340     bind(L);


1343 
1344   subs(size, size, wordSize);
1345   b(remaining, le);
1346 
1347   // Zero by 2 words per iteration.
1348   bind(loop);
1349   subs(size, size, 2*wordSize);
1350   stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
1351   b(loop, gt);
1352 
1353   bind(remaining);
1354   b(done, ne);
1355   str(ZR, Address(ptr));
1356   bind(done);
1357 #else
1358   mov(tmp, 0);
1359   bind(loop);
1360   cmp(ptr, end);
1361   str(tmp, Address(ptr, wordSize, post_indexed), lo);
1362   b(loop, lo);
1363 #endif // AARCH64
1364 }
1365 
1366 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
1367 #ifdef AARCH64
1368   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1369   add_rc(tmp, tmp, size_in_bytes);
1370   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1371 #else
1372   // Bump total bytes allocated by this thread
1373   Label done;
1374 
1375   // Borrow the Rthread for alloc counter
1376   Register Ralloc = Rthread;
1377   add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
1378   ldr(tmp, Address(Ralloc));
1379   adds(tmp, tmp, size_in_bytes);
1380   str(tmp, Address(Ralloc), cc);
1381   b(done, cc);
1382 
1383   // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
1384   // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
1385   // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
1386   Register low, high;
1387   // Select ether R0/R1 or R2/R3
1388 
1389   if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
1390     low = R2;
1391     high  = R3;
1392   } else {
1393     low = R0;
1394     high  = R1;
1395   }
1396   push(RegisterSet(low, high));
1397 
1398   ldrd(low, Address(Ralloc));
1399   adds(low, low, size_in_bytes);
1400   adc(high, high, 0);
1401   strd(low, Address(Ralloc));
1402 
1403   pop(RegisterSet(low, high));
1404 
1405   bind(done);
1406 
1407   // Unborrow the Rthread
1408   sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
1409 #endif // AARCH64
1410 }
1411 
1412 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
1413   // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
1414   if (UseStackBanging) {
1415     const int page_size = os::vm_page_size();
1416 
1417     sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
1418     strb(R0, Address(tmp));
1419 #ifdef AARCH64
1420     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
1421       sub(tmp, tmp, page_size);
1422       strb(R0, Address(tmp));
1423     }
1424 #else
1425     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
1426       strb(R0, Address(tmp, -0xff0, pre_indexed));
1427     }
1428 #endif // AARCH64




1239 #ifdef AARCH64
1240     ldr(ZR, Address(reg));
1241 #else
1242     assert_different_registers(reg, tmp);
1243     if (tmp == noreg) {
1244       tmp = Rtemp;
1245       assert((! Thread::current()->is_Compiler_thread()) ||
1246              (! (ciEnv::current()->task() == NULL)) ||
1247              (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
1248              "Rtemp not available in C2"); // explicit tmp register required
1249       // XXX: could we mark the code buffer as not compatible with C2 ?
1250     }
1251     ldr(tmp, Address(reg));
1252 #endif
1253   }
1254 }
1255 
1256 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1257 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
1258                                  RegisterOrConstant size_expression, Label& slow_case) {
1259   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1260   bs->eden_allocate(this, obj, obj_end, tmp1, tmp2, size_expression, slow_case);














































1261 }
1262 
1263 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1264 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
1265                                  RegisterOrConstant size_expression, Label& slow_case) {
1266   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1267   bs->tlab_allocate(this, obj, obj_end, tmp1, size_expression, slow_case);







1268 }
1269 
1270 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
1271 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
1272   Label loop;
1273   const Register ptr = start;
1274 
1275 #ifdef AARCH64
1276   // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
1277   const Register size = tmp;
1278   Label remaining, done;
1279 
1280   sub(size, end, start);
1281 
1282 #ifdef ASSERT
1283   { Label L;
1284     tst(size, wordSize - 1);
1285     b(L, eq);
1286     stop("size is not a multiple of wordSize");
1287     bind(L);


1290 
1291   subs(size, size, wordSize);
1292   b(remaining, le);
1293 
1294   // Zero by 2 words per iteration.
1295   bind(loop);
1296   subs(size, size, 2*wordSize);
1297   stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
1298   b(loop, gt);
1299 
1300   bind(remaining);
1301   b(done, ne);
1302   str(ZR, Address(ptr));
1303   bind(done);
1304 #else
1305   mov(tmp, 0);
1306   bind(loop);
1307   cmp(ptr, end);
1308   str(tmp, Address(ptr, wordSize, post_indexed), lo);
1309   b(loop, lo);














































1310 #endif // AARCH64
1311 }
1312 
1313 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
1314   // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
1315   if (UseStackBanging) {
1316     const int page_size = os::vm_page_size();
1317 
1318     sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
1319     strb(R0, Address(tmp));
1320 #ifdef AARCH64
1321     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
1322       sub(tmp, tmp, page_size);
1323       strb(R0, Address(tmp));
1324     }
1325 #else
1326     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
1327       strb(R0, Address(tmp, -0xff0, pre_indexed));
1328     }
1329 #endif // AARCH64


< prev index next >