1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * Copyright 2012, 2014 SAP AG. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
68 case b_op: m = li(-1); v = li(disp(dest_pos, inst_pos)); break;
69 case bc_op: m = bd(-1); v = bd(disp(dest_pos, inst_pos)); break;
70 default: ShouldNotReachHere();
71 }
72 return inst & ~m | v;
73 }
74
75 // Return the offset, relative to _code_begin, of the destination of
76 // the branch inst at offset pos.
77 int Assembler::branch_destination(int inst, int pos) {
78 int r = 0;
79 switch (inv_op_ppc(inst)) {
80 case b_op: r = bxx_destination_offset(inst, pos); break;
81 case bc_op: r = inv_bd_field(inst, pos); break;
82 default: ShouldNotReachHere();
83 }
84 return r;
85 }
86
87 // Low-level andi-one-instruction-macro.
88 void Assembler::andi(Register a, Register s, const int ui16) {
89 assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
90 if (is_power_of_2_long(((jlong) ui16)+1)) {
91 // pow2minus1
92 clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
93 } else if (is_power_of_2_long((jlong) ui16)) {
94 // pow2
95 rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16));
96 } else if (is_power_of_2_long((jlong)-ui16)) {
97 // negpow2
98 clrrdi(a, s, log2_long((jlong)-ui16));
99 } else {
100 andi_(a, s, ui16);
101 }
102 }
103
104 // RegisterOrConstant version.
105 void Assembler::ld(Register d, RegisterOrConstant roc, Register s1) {
106 if (roc.is_constant()) {
107 if (s1 == noreg) {
108 int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
109 Assembler::ld(d, simm16_rest, d);
110 } else if (is_simm(roc.as_constant(), 16)) {
111 Assembler::ld(d, roc.as_constant(), s1);
112 } else {
113 load_const_optimized(d, roc.as_constant());
114 Assembler::ldx(d, d, s1);
115 }
116 } else {
117 if (s1 == noreg)
118 Assembler::ld(d, 0, roc.as_register());
119 else
339 Assembler::ori( d, d, (unsigned int)xb);
340 Assembler::sldi(d, d, 32);
341 Assembler::oris(d, d, (unsigned int)xc);
342 Assembler::ori( d, d, (unsigned int)xd);
343 } else {
344 // exploit instruction level parallelism if we have a tmp register
345 assert_different_registers(d, tmp);
346 Assembler::lis(tmp, (int)(short)xa);
347 Assembler::lis(d, (int)(short)xc);
348 Assembler::ori(tmp, tmp, (unsigned int)xb);
349 Assembler::ori(d, d, (unsigned int)xd);
350 Assembler::insrdi(d, tmp, 32, 0);
351 }
352 }
353
354 // Load a 64 bit constant, optimized, not identifyable.
355 // Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
356 // 16 bit immediate offset.
357 int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
358 // Avoid accidentally trying to use R0 for indexed addressing.
359 assert(d != R0, "R0 not allowed");
360 assert_different_registers(d, tmp);
361
362 short xa, xb, xc, xd; // Four 16-bit chunks of const.
363 long rem = x; // Remaining part of const.
364
365 xd = rem & 0xFFFF; // Lowest 16-bit chunk.
366 rem = (rem >> 16) + ((unsigned short)xd >> 15); // Compensation for sign extend.
367
368 if (rem == 0) { // opt 1: simm16
369 li(d, xd);
370 return 0;
371 }
372
373 xc = rem & 0xFFFF; // Next 16-bit chunk.
374 rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
375
376 if (rem == 0) { // opt 2: simm32
377 lis(d, xc);
378 } else { // High 32 bits needed.
379
380 if (tmp != noreg) { // opt 3: We have a temp reg.
381 // No carry propagation between xc and higher chunks here (use logical instructions).
382 xa = (x >> 48) & 0xffff;
383 xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
384 bool load_xa = (xa != 0) || (xb < 0);
385 bool return_xd = false;
386
387 if (load_xa) { lis(tmp, xa); }
388 if (xc) { lis(d, xc); }
389 if (load_xa) {
390 if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
391 } else {
392 li(tmp, xb); // non-negative
393 }
394 if (xc) {
395 if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi.
396 else if (xd) { addi(d, d, xd); }
397 } else {
398 li(d, xd);
399 }
400 insrdi(d, tmp, 32, 0);
401 return return_xd ? xd : 0; // non-negative
402 }
403
404 xb = rem & 0xFFFF; // Next 16-bit chunk.
405 rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend.
406
407 xa = rem & 0xFFFF; // Highest 16-bit chunk.
408
409 // opt 4: avoid adding 0
410 if (xa) { // Highest 16-bit needed?
411 lis(d, xa);
412 if (xb) { addi(d, d, xb); }
413 } else {
414 li(d, xb);
415 }
416 sldi(d, d, 32);
417 if (xc) { addis(d, d, xc); }
418 }
419
420 // opt 5: Return offset to be inserted into following instruction.
421 if (return_simm16_rest) return xd;
422
423 if (xd) { addi(d, d, xd); }
424 return 0;
425 }
426
427 #ifndef PRODUCT
428 // Test of ppc assembler.
429 void Assembler::test_asm() {
430 // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
431 addi( R0, R1, 10);
432 addis( R5, R2, 11);
433 addic_( R3, R31, 42);
434 subfic( R21, R12, 2112);
435 add( R3, R2, R1);
436 add_( R11, R22, R30);
437 subf( R7, R6, R5);
438 subf_( R8, R9, R4);
439 addc( R11, R12, R13);
440 addc_( R14, R14, R14);
441 subfc( R15, R16, R17);
442 subfc_( R18, R20, R19);
443 adde( R20, R22, R24);
444 adde_( R29, R27, R26);
|
1 /*
2 * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
3 * Copyright 2012, 2015 SAP AG. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
68 case b_op: m = li(-1); v = li(disp(dest_pos, inst_pos)); break;
69 case bc_op: m = bd(-1); v = bd(disp(dest_pos, inst_pos)); break;
70 default: ShouldNotReachHere();
71 }
72 return inst & ~m | v;
73 }
74
75 // Return the offset, relative to _code_begin, of the destination of
76 // the branch inst at offset pos.
77 int Assembler::branch_destination(int inst, int pos) {
78 int r = 0;
79 switch (inv_op_ppc(inst)) {
80 case b_op: r = bxx_destination_offset(inst, pos); break;
81 case bc_op: r = inv_bd_field(inst, pos); break;
82 default: ShouldNotReachHere();
83 }
84 return r;
85 }
86
87 // Low-level andi-one-instruction-macro.
88 void Assembler::andi(Register a, Register s, const long ui16) {
89 if (is_power_of_2_long(((jlong) ui16)+1)) {
90 // pow2minus1
91 clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
92 } else if (is_power_of_2_long((jlong) ui16)) {
93 // pow2
94 rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16));
95 } else if (is_power_of_2_long((jlong)-ui16)) {
96 // negpow2
97 clrrdi(a, s, log2_long((jlong)-ui16));
98 } else {
99 assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
100 andi_(a, s, ui16);
101 }
102 }
103
104 // RegisterOrConstant version.
105 void Assembler::ld(Register d, RegisterOrConstant roc, Register s1) {
106 if (roc.is_constant()) {
107 if (s1 == noreg) {
108 int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
109 Assembler::ld(d, simm16_rest, d);
110 } else if (is_simm(roc.as_constant(), 16)) {
111 Assembler::ld(d, roc.as_constant(), s1);
112 } else {
113 load_const_optimized(d, roc.as_constant());
114 Assembler::ldx(d, d, s1);
115 }
116 } else {
117 if (s1 == noreg)
118 Assembler::ld(d, 0, roc.as_register());
119 else
339 Assembler::ori( d, d, (unsigned int)xb);
340 Assembler::sldi(d, d, 32);
341 Assembler::oris(d, d, (unsigned int)xc);
342 Assembler::ori( d, d, (unsigned int)xd);
343 } else {
344 // exploit instruction level parallelism if we have a tmp register
345 assert_different_registers(d, tmp);
346 Assembler::lis(tmp, (int)(short)xa);
347 Assembler::lis(d, (int)(short)xc);
348 Assembler::ori(tmp, tmp, (unsigned int)xb);
349 Assembler::ori(d, d, (unsigned int)xd);
350 Assembler::insrdi(d, tmp, 32, 0);
351 }
352 }
353
354 // Load a 64 bit constant, optimized, not identifyable.
355 // Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
356 // 16 bit immediate offset.
357 int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
358 // Avoid accidentally trying to use R0 for indexed addressing.
359 assert_different_registers(d, tmp);
360
361 short xa, xb, xc, xd; // Four 16-bit chunks of const.
362 long rem = x; // Remaining part of const.
363
364 xd = rem & 0xFFFF; // Lowest 16-bit chunk.
365 rem = (rem >> 16) + ((unsigned short)xd >> 15); // Compensation for sign extend.
366
367 if (rem == 0) { // opt 1: simm16
368 li(d, xd);
369 return 0;
370 }
371
372 int retval = 0;
373 if (return_simm16_rest) {
374 retval = xd;
375 x = rem << 16;
376 xd = 0;
377 }
378
379 if (d == R0) { // Can't use addi.
380 if (is_simm(x, 32)) { // opt 2: simm32
381 lis(d, x >> 16);
382 if (xd) ori(d, d, (unsigned short)xd);
383 } else {
384 // 64-bit value: x = xa xb xc xd
385 xa = (x >> 48) & 0xffff;
386 xb = (x >> 32) & 0xffff;
387 xc = (x >> 16) & 0xffff;
388 bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
389 if (tmp == noreg || (xc == 0 && xd == 0)) {
390 if (xa_loaded) {
391 lis(d, xa);
392 if (xb) { ori(d, d, (unsigned short)xb); }
393 } else {
394 li(d, xb);
395 }
396 sldi(d, d, 32);
397 if (xc) { oris(d, d, (unsigned short)xc); }
398 if (xd) { ori( d, d, (unsigned short)xd); }
399 } else {
400 // Exploit instruction level parallelism if we have a tmp register.
401 bool xc_loaded = (xd & 0x8000) ? (xc != -1) : (xc != 0);
402 if (xa_loaded) {
403 lis(tmp, xa);
404 }
405 if (xc_loaded) {
406 lis(d, xc);
407 }
408 if (xa_loaded) {
409 if (xb) { ori(tmp, tmp, (unsigned short)xb); }
410 } else {
411 li(tmp, xb);
412 }
413 if (xc_loaded) {
414 if (xd) { ori(d, d, (unsigned short)xd); }
415 } else {
416 li(d, xd);
417 }
418 insrdi(d, tmp, 32, 0);
419 }
420 }
421 return retval;
422 }
423
424 xc = rem & 0xFFFF; // Next 16-bit chunk.
425 rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
426
427 if (rem == 0) { // opt 2: simm32
428 lis(d, xc);
429 } else { // High 32 bits needed.
430
431 if (tmp != noreg && (int)x != 0) { // opt 3: We have a temp reg.
432 // No carry propagation between xc and higher chunks here (use logical instructions).
433 xa = (x >> 48) & 0xffff;
434 xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
435 bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
436 bool return_xd = false;
437
438 if (xa_loaded) { lis(tmp, xa); }
439 if (xc) { lis(d, xc); }
440 if (xa_loaded) {
441 if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
442 } else {
443 li(tmp, xb);
444 }
445 if (xc) {
446 if (xd) { addi(d, d, xd); }
447 } else {
448 li(d, xd);
449 }
450 insrdi(d, tmp, 32, 0);
451 return retval;
452 }
453
454 xb = rem & 0xFFFF; // Next 16-bit chunk.
455 rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend.
456
457 xa = rem & 0xFFFF; // Highest 16-bit chunk.
458
459 // opt 4: avoid adding 0
460 if (xa) { // Highest 16-bit needed?
461 lis(d, xa);
462 if (xb) { addi(d, d, xb); }
463 } else {
464 li(d, xb);
465 }
466 sldi(d, d, 32);
467 if (xc) { addis(d, d, xc); }
468 }
469
470 if (xd) { addi(d, d, xd); }
471 return retval;
472 }
473
474 // We emit only one addition to s to optimize latency.
475 int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp, bool return_simm16_rest) {
476 assert(s != R0 && s != tmp, "unsupported");
477 long rem = x;
478
479 // Case 1: Can use mr or addi.
480 short xd = rem & 0xFFFF; // Lowest 16-bit chunk.
481 rem = (rem >> 16) + ((unsigned short)xd >> 15);
482 if (rem == 0) {
483 if (xd == 0) {
484 if (d != s) { mr(d, s); }
485 return 0;
486 }
487 if (return_simm16_rest) {
488 return xd;
489 }
490 addi(d, s, xd);
491 return 0;
492 }
493
494 // Case 2: Can use addis.
495 if (xd == 0) {
496 short xc = rem & 0xFFFF; // 2nd 16-bit chunk.
497 rem = (rem >> 16) + ((unsigned short)xd >> 15);
498 if (rem == 0) {
499 addis(d, s, xc);
500 return 0;
501 }
502 }
503
504 // Other cases: load & add.
505 Register tmp1 = tmp,
506 tmp2 = noreg;
507 if ((d != tmp) && (d != s)) {
508 // Can use d.
509 tmp1 = d;
510 tmp2 = tmp;
511 }
512 int simm16_rest = load_const_optimized(tmp1, x, tmp2, return_simm16_rest);
513 add(d, tmp1, s);
514 return simm16_rest;
515 }
516
517 #ifndef PRODUCT
518 // Test of ppc assembler.
519 void Assembler::test_asm() {
520 // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
521 addi( R0, R1, 10);
522 addis( R5, R2, 11);
523 addic_( R3, R31, 42);
524 subfic( R21, R12, 2112);
525 add( R3, R2, R1);
526 add_( R11, R22, R30);
527 subf( R7, R6, R5);
528 subf_( R8, R9, R4);
529 addc( R11, R12, R13);
530 addc_( R14, R14, R14);
531 subfc( R15, R16, R17);
532 subfc_( R18, R20, R19);
533 adde( R20, R22, R24);
534 adde_( R29, R27, R26);
|