< prev index next >

src/hotspot/cpu/aarch64/aarch64-asmtest.py

Print this page
rev 60615 : 8231441: Initial SVE backend support
Reviewed-by: adinn, pli
Contributed-by: joshua.zhu@arm.com, yang.zhang@arm.com, ningsheng.jian@arm.com


  51         else:
  52             return self.astr("r")
  53 
  54 class GeneralRegisterOrSp(Register):
  55     def generate(self):
  56         self.number = random.randint(0, 31)
  57         return self
  58 
  59     def astr(self, prefix = ""):
  60         if (self.number == 31):
  61             return "sp"
  62         else:
  63             return prefix + str(self.number)
  64 
  65     def __str__(self):
  66         if (self.number == 31):
  67             return self.astr()
  68         else:
  69             return self.astr("r")
  70 











































  71 class FloatZero(Operand):
  72 
  73     def __str__(self):
  74         return "0.0"
  75 
  76     def astr(self, ignored):
  77         return "#0.0"
  78 
  79 class OperandFactory:
  80 
  81     _modes = {'x' : GeneralRegister,
  82               'w' : GeneralRegister,
  83               's' : FloatRegister,
  84               'd' : FloatRegister,
  85               'z' : FloatZero}



  86 
  87     @classmethod
  88     def create(cls, mode):
  89         return OperandFactory._modes[mode]()
  90 
  91 class ShiftKind:
  92 
  93     def generate(self):
  94         self.kind = ["LSL", "LSR", "ASR"][random.randint(0,2)]
  95         return self
  96 
  97     def cstr(self):
  98         return self.kind
  99 
 100 class Instruction(object):
 101 
 102     def __init__(self, name):
 103         self._name = name
 104         self.isWord = name.endswith("w") | name.endswith("wi")
 105         self.asmRegPrefix = ["x", "w"][self.isWord]


 822         name, self.modes = args
 823         Instruction.__init__(self, name)
 824 
 825     def generate(self):
 826         self.reg = [OperandFactory.create(self.modes[i]).generate() 
 827                     for i in range(self.numRegs)]
 828         return self
 829 
 830     def cstr(self):
 831         formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)] + [");"])
 832         return (formatStr
 833                 % tuple([Instruction.cstr(self)] +
 834                         [str(self.reg[i]) for i in range(self.numRegs)])) # Yowza
 835     
 836     def astr(self):
 837         formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)])
 838         return (formatStr
 839                 % tuple([Instruction.astr(self)] +
 840                         [(self.reg[i].astr(self.modes[i])) for i in range(self.numRegs)]))
 841 






























































































 842 class LdStSIMDOp(Instruction):
 843     def __init__(self, args):
 844         self._name, self.regnum, self.arrangement, self.addresskind = args
 845 
 846     def generate(self):
 847         self.address = Address().generate(self.addresskind, 0)
 848         self._firstSIMDreg = FloatRegister().generate()
 849         if (self.addresskind  == Address.post):
 850             if (self._name in ["ld1r", "ld2r", "ld3r", "ld4r"]):
 851                 elem_size = {"8B" : 1, "16B" : 1, "4H" : 2, "8H" : 2, "2S" : 4, "4S" : 4, "1D" : 8, "2D" : 8} [self.arrangement]
 852                 self.address.offset = self.regnum * elem_size
 853             else:
 854                 if (self.arrangement in ["8B", "4H", "2S", "1D"]):
 855                     self.address.offset = self.regnum * 8
 856                 else:
 857                     self.address.offset = self.regnum * 16
 858         return self
 859 
 860     def cstr(self):
 861         buf = super(LdStSIMDOp, self).cstr() + str(self._firstSIMDreg)


1143                       ["ld4r", 4, "4H",  Address.post],
1144                       ["ld4r", 4, "2S",  Address.post_reg],
1145 ])
1146 
1147 generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"])
1148 
1149 generate(SpecialCases, [["ccmn",   "__ ccmn(zr, zr, 3u, Assembler::LE);",                "ccmn\txzr, xzr, #3, LE"],
1150                         ["ccmnw",  "__ ccmnw(zr, zr, 5u, Assembler::EQ);",               "ccmn\twzr, wzr, #5, EQ"],
1151                         ["ccmp",   "__ ccmp(zr, 1, 4u, Assembler::NE);",                 "ccmp\txzr, 1, #4, NE"],
1152                         ["ccmpw",  "__ ccmpw(zr, 2, 2, Assembler::GT);",                 "ccmp\twzr, 2, #2, GT"],
1153                         ["extr",   "__ extr(zr, zr, zr, 0);",                            "extr\txzr, xzr, xzr, 0"],
1154                         ["stlxp",  "__ stlxp(r0, zr, zr, sp);",                          "stlxp\tw0, xzr, xzr, [sp]"],
1155                         ["stlxpw", "__ stlxpw(r2, zr, zr, r3);",                         "stlxp\tw2, wzr, wzr, [x3]"],
1156                         ["stxp",   "__ stxp(r4, zr, zr, r5);",                           "stxp\tw4, xzr, xzr, [x5]"],
1157                         ["stxpw",  "__ stxpw(r6, zr, zr, sp);",                          "stxp\tw6, wzr, wzr, [sp]"],
1158                         ["dup",    "__ dup(v0, __ T16B, zr);",                           "dup\tv0.16b, wzr"],
1159                         ["mov",    "__ mov(v1, __ T1D, 0, zr);",                         "mov\tv1.d[0], xzr"],
1160                         ["mov",    "__ mov(v1, __ T2S, 1, zr);",                         "mov\tv1.s[1], wzr"],
1161                         ["mov",    "__ mov(v1, __ T4H, 2, zr);",                         "mov\tv1.h[2], wzr"],
1162                         ["mov",    "__ mov(v1, __ T8B, 3, zr);",                         "mov\tv1.b[3], wzr"],
1163                         ["ld1",    "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"]])



































1164 
1165 print "\n// FloatImmediateOp"
1166 for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125", 
1167               "0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625", 
1168               "-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0", 
1169               "-0.125", "-0.1328125", "-0.25", "-0.265625", "-0.5", "-0.53125", "-1.0", "-1.0625"):
1170     astr = "fmov d0, #" + float
1171     cstr = "__ fmovd(v0, " + float + ");"
1172     print "    %-50s //\t%s" % (cstr, astr)
1173     outfile.write("\t" + astr + "\n")
1174 
1175 # ARMv8.1A
1176 for size in ("x", "w"):
1177     for suffix in ("", "a", "al", "l"):
1178         generate(LSEOp, [["swp", "swp", size, suffix],
1179                          ["ldadd", "ldadd", size, suffix],
1180                          ["ldbic", "ldclr", size, suffix],
1181                          ["ldeor", "ldeor", size, suffix],
1182                          ["ldorr", "ldset", size, suffix],
1183                          ["ldsmin", "ldsmin", size, suffix],
1184                          ["ldsmax", "ldsmax", size, suffix],
1185                          ["ldumin", "ldumin", size, suffix],
1186                          ["ldumax", "ldumax", size, suffix]]);
1187 











































1188 print "\n    __ bind(forth);"
1189 outfile.write("forth:\n")
1190 
1191 outfile.close()
1192 
1193 import subprocess
1194 import sys
1195 
1196 # compile for 8.1 and sha2 because of lse atomics and sha512 crypto extension.
1197 subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2", "aarch64ops.s", "-o", "aarch64ops.o"])
1198 
1199 print
1200 print "/*",
1201 sys.stdout.flush()
1202 subprocess.check_call([AARCH64_OBJDUMP, "-d", "aarch64ops.o"])
1203 print "*/"
1204 
1205 subprocess.check_call([AARCH64_OBJCOPY, "-O", "binary", "-j", ".text", "aarch64ops.o", "aarch64ops.bin"])
1206 
1207 infile = open("aarch64ops.bin", "r")
1208 bytes = bytearray(infile.read())
1209 
1210 print
1211 print "  static const unsigned int insns[] ="
1212 print "  {"
1213 
1214 i = 0
1215 while i < len(bytes):
1216      print "    0x%02x%02x%02x%02x," % (bytes[i+3], bytes[i+2], bytes[i+1], bytes[i]),
1217      i += 4


  51         else:
  52             return self.astr("r")
  53 
  54 class GeneralRegisterOrSp(Register):
  55     def generate(self):
  56         self.number = random.randint(0, 31)
  57         return self
  58 
  59     def astr(self, prefix = ""):
  60         if (self.number == 31):
  61             return "sp"
  62         else:
  63             return prefix + str(self.number)
  64 
  65     def __str__(self):
  66         if (self.number == 31):
  67             return self.astr()
  68         else:
  69             return self.astr("r")
  70 
  71 class SVEVectorRegister(FloatRegister):
  72     def __str__(self):
  73         return self.astr("z")
  74 
  75 class SVEPRegister(Register):
  76     def __str__(self):
  77         return self.astr("p")
  78 
  79     def generate(self):
  80         self.number = random.randint(0, 15)
  81         return self
  82 
  83 class SVEGoverningPRegister(Register):
  84     def __str__(self):
  85         return self.astr("p")
  86     def generate(self):
  87         self.number = random.randint(0, 7)
  88         return self
  89 
  90 class RegVariant(object):
  91     def __init__(self, low, high):
  92         self.number = random.randint(low, high)
  93 
  94     def astr(self):
  95         nameMap = {
  96              0: ".b",
  97              1: ".h",
  98              2: ".s",
  99              3: ".d",
 100              4: ".q"
 101         }
 102         return nameMap.get(self.number)
 103 
 104     def cstr(self):
 105         nameMap = {
 106              0: "__ B",
 107              1: "__ H",
 108              2: "__ S",
 109              3: "__ D",
 110              4: "__ Q"
 111         }
 112         return nameMap.get(self.number)
 113 
 114 class FloatZero(Operand):
 115 
 116     def __str__(self):
 117         return "0.0"
 118 
 119     def astr(self, ignored):
 120         return "#0.0"
 121 
 122 class OperandFactory:
 123 
 124     _modes = {'x' : GeneralRegister,
 125               'w' : GeneralRegister,
 126               's' : FloatRegister,
 127               'd' : FloatRegister,
 128               'z' : FloatZero,
 129               'p' : SVEPRegister,
 130               'P' : SVEGoverningPRegister,
 131               'Z' : SVEVectorRegister}
 132 
 133     @classmethod
 134     def create(cls, mode):
 135         return OperandFactory._modes[mode]()
 136 
 137 class ShiftKind:
 138 
 139     def generate(self):
 140         self.kind = ["LSL", "LSR", "ASR"][random.randint(0,2)]
 141         return self
 142 
 143     def cstr(self):
 144         return self.kind
 145 
 146 class Instruction(object):
 147 
 148     def __init__(self, name):
 149         self._name = name
 150         self.isWord = name.endswith("w") | name.endswith("wi")
 151         self.asmRegPrefix = ["x", "w"][self.isWord]


 868         name, self.modes = args
 869         Instruction.__init__(self, name)
 870 
 871     def generate(self):
 872         self.reg = [OperandFactory.create(self.modes[i]).generate() 
 873                     for i in range(self.numRegs)]
 874         return self
 875 
 876     def cstr(self):
 877         formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)] + [");"])
 878         return (formatStr
 879                 % tuple([Instruction.cstr(self)] +
 880                         [str(self.reg[i]) for i in range(self.numRegs)])) # Yowza
 881     
 882     def astr(self):
 883         formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)])
 884         return (formatStr
 885                 % tuple([Instruction.astr(self)] +
 886                         [(self.reg[i].astr(self.modes[i])) for i in range(self.numRegs)]))
 887 
 888 class SVEVectorOp(Instruction):
 889     def __init__(self, args):
 890         name = args[0]
 891         regTypes = args[1]
 892         regs = []
 893         for c in regTypes:
 894             regs.append(OperandFactory.create(c).generate())
 895         self.reg = regs
 896         self.numRegs = len(regs)
 897         if regTypes[0] != "p" and regTypes[1] == 'P':
 898            self._isPredicated = True
 899            self._merge = "/m"
 900         else:
 901            self._isPredicated = False
 902            self._merge =""
 903 
 904         self._bitwiseop = False
 905         if name[0] == 'f':
 906             self._width = RegVariant(2, 3)
 907         elif not self._isPredicated and (name == "and" or name == "eor" or name == "orr"):
 908             self._width = RegVariant(3, 3)
 909             self._bitwiseop = True
 910         else:
 911             self._width = RegVariant(0, 3)
 912         if len(args) > 2:
 913             self._dnm = args[2]
 914         else:
 915             self._dnm = None
 916         Instruction.__init__(self, name)
 917 
 918     def cstr(self):
 919         formatStr = "%s%s" + ''.join([", %s" for i in range(0, self.numRegs)] + [");"])
 920         if self._bitwiseop:
 921             width = []
 922             formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)] + [");"])
 923         else:
 924             width = [self._width.cstr()]
 925         return (formatStr
 926                 % tuple(["__ sve_" + self._name + "("] +
 927                         [str(self.reg[0])] +
 928                         width +
 929                         [str(self.reg[i]) for i in range(1, self.numRegs)]))
 930     def astr(self):
 931         formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)])
 932         if self._dnm == 'dn':
 933             formatStr += ", %s"
 934             dnReg = [str(self.reg[0]) + self._width.astr()]
 935         else:
 936             dnReg = []
 937 
 938         if self._isPredicated:
 939             restRegs = [str(self.reg[1]) + self._merge] + dnReg + [str(self.reg[i]) + self._width.astr() for i in range(2, self.numRegs)]
 940         else:
 941             restRegs = dnReg + [str(self.reg[i]) + self._width.astr() for i in range(1, self.numRegs)]
 942         return (formatStr
 943                 % tuple([Instruction.astr(self)] +
 944                         [str(self.reg[0]) + self._width.astr()] +
 945                         restRegs))
 946     def generate(self):
 947         return self
 948 
 949 class SVEReductionOp(Instruction):
 950     def __init__(self, args):
 951         name = args[0]
 952         lowRegType = args[1]
 953         self.reg = []
 954         Instruction.__init__(self, name)
 955         self.reg.append(OperandFactory.create('s').generate())
 956         self.reg.append(OperandFactory.create('P').generate())
 957         self.reg.append(OperandFactory.create('Z').generate())
 958         self._width = RegVariant(lowRegType, 3)
 959     def cstr(self):
 960         return "__ sve_%s(%s, %s, %s, %s);" % (self.name(),
 961                                               str(self.reg[0]),
 962                                               self._width.cstr(),
 963                                               str(self.reg[1]),
 964                                               str(self.reg[2]))
 965     def astr(self):
 966         if self.name() == "uaddv":
 967             dstRegName = "d" + str(self.reg[0].number)
 968         else:
 969             dstRegName = self._width.astr()[1] + str(self.reg[0].number)
 970         formatStr = "%s %s, %s, %s"
 971         if self.name() == "fadda":
 972             formatStr += ", %s"
 973             moreReg = [dstRegName]
 974         else:
 975             moreReg = []
 976         return formatStr % tuple([self.name()] +
 977                                  [dstRegName] +
 978                                  [str(self.reg[1])] +
 979                                  moreReg +
 980                                  [str(self.reg[2]) + self._width.astr()])
 981 
 982 class LdStSIMDOp(Instruction):
 983     def __init__(self, args):
 984         self._name, self.regnum, self.arrangement, self.addresskind = args
 985 
 986     def generate(self):
 987         self.address = Address().generate(self.addresskind, 0)
 988         self._firstSIMDreg = FloatRegister().generate()
 989         if (self.addresskind  == Address.post):
 990             if (self._name in ["ld1r", "ld2r", "ld3r", "ld4r"]):
 991                 elem_size = {"8B" : 1, "16B" : 1, "4H" : 2, "8H" : 2, "2S" : 4, "4S" : 4, "1D" : 8, "2D" : 8} [self.arrangement]
 992                 self.address.offset = self.regnum * elem_size
 993             else:
 994                 if (self.arrangement in ["8B", "4H", "2S", "1D"]):
 995                     self.address.offset = self.regnum * 8
 996                 else:
 997                     self.address.offset = self.regnum * 16
 998         return self
 999 
1000     def cstr(self):
1001         buf = super(LdStSIMDOp, self).cstr() + str(self._firstSIMDreg)


1283                       ["ld4r", 4, "4H",  Address.post],
1284                       ["ld4r", 4, "2S",  Address.post_reg],
1285 ])
1286 
1287 generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"])
1288 
1289 generate(SpecialCases, [["ccmn",   "__ ccmn(zr, zr, 3u, Assembler::LE);",                "ccmn\txzr, xzr, #3, LE"],
1290                         ["ccmnw",  "__ ccmnw(zr, zr, 5u, Assembler::EQ);",               "ccmn\twzr, wzr, #5, EQ"],
1291                         ["ccmp",   "__ ccmp(zr, 1, 4u, Assembler::NE);",                 "ccmp\txzr, 1, #4, NE"],
1292                         ["ccmpw",  "__ ccmpw(zr, 2, 2, Assembler::GT);",                 "ccmp\twzr, 2, #2, GT"],
1293                         ["extr",   "__ extr(zr, zr, zr, 0);",                            "extr\txzr, xzr, xzr, 0"],
1294                         ["stlxp",  "__ stlxp(r0, zr, zr, sp);",                          "stlxp\tw0, xzr, xzr, [sp]"],
1295                         ["stlxpw", "__ stlxpw(r2, zr, zr, r3);",                         "stlxp\tw2, wzr, wzr, [x3]"],
1296                         ["stxp",   "__ stxp(r4, zr, zr, r5);",                           "stxp\tw4, xzr, xzr, [x5]"],
1297                         ["stxpw",  "__ stxpw(r6, zr, zr, sp);",                          "stxp\tw6, wzr, wzr, [sp]"],
1298                         ["dup",    "__ dup(v0, __ T16B, zr);",                           "dup\tv0.16b, wzr"],
1299                         ["mov",    "__ mov(v1, __ T1D, 0, zr);",                         "mov\tv1.d[0], xzr"],
1300                         ["mov",    "__ mov(v1, __ T2S, 1, zr);",                         "mov\tv1.s[1], wzr"],
1301                         ["mov",    "__ mov(v1, __ T4H, 2, zr);",                         "mov\tv1.h[2], wzr"],
1302                         ["mov",    "__ mov(v1, __ T8B, 3, zr);",                         "mov\tv1.b[3], wzr"],
1303                         ["ld1",    "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"],
1304                         # SVE instructions
1305                         ["cpy",    "__ sve_cpy(z0, __ S, p0, v1);",                      "mov\tz0.s, p0/m, s1"],
1306                         ["inc",    "__ sve_inc(r0, __ S);",                              "incw\tx0"],
1307                         ["dec",    "__ sve_dec(r1, __ H);",                              "dech\tx1"],
1308                         ["lsl",    "__ sve_lsl(z0, __ B, z1, 7);",                       "lsl\tz0.b, z1.b, #7"],
1309                         ["lsl",    "__ sve_lsl(z21, __ H, z1, 15);",                     "lsl\tz21.h, z1.h, #15"],
1310                         ["lsl",    "__ sve_lsl(z0, __ S, z1, 31);",                      "lsl\tz0.s, z1.s, #31"],
1311                         ["lsl",    "__ sve_lsl(z0, __ D, z1, 63);",                      "lsl\tz0.d, z1.d, #63"],
1312                         ["lsr",    "__ sve_lsr(z0, __ B, z1, 7);",                       "lsr\tz0.b, z1.b, #7"],
1313                         ["asr",    "__ sve_asr(z0, __ H, z11, 15);",                     "asr\tz0.h, z11.h, #15"],
1314                         ["lsr",    "__ sve_lsr(z30, __ S, z1, 31);",                     "lsr\tz30.s, z1.s, #31"],
1315                         ["asr",    "__ sve_asr(z0, __ D, z1, 63);",                      "asr\tz0.d, z1.d, #63"],
1316                         ["addvl",  "__ sve_addvl(sp, r0, 31);",                          "addvl\tsp, x0, #31"],
1317                         ["addpl",  "__ sve_addpl(r1, sp, -32);",                         "addpl\tx1, sp, -32"],
1318                         ["cntp",   "__ sve_cntp(r8, __ B, p0, p1);",                     "cntp\tx8, p0, p1.b"],
1319                         ["dup",    "__ sve_dup(z0, __ B, 127);",                         "dup\tz0.b, 127"],
1320                         ["dup",    "__ sve_dup(z1, __ H, -128);",                        "dup\tz1.h, -128"],
1321                         ["dup",    "__ sve_dup(z2, __ S, 32512);",                       "dup\tz2.s, 32512"],
1322                         ["dup",    "__ sve_dup(z7, __ D, -32768);",                      "dup\tz7.d, -32768"],
1323                         ["ld1b",   "__ sve_ld1b(z0, __ B, p0, Address(sp));",            "ld1b\t{z0.b}, p0/z, [sp]"],
1324                         ["ld1h",   "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));",       "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"],
1325                         ["ld1w",   "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));",        "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"],
1326                         ["ld1b",   "__ sve_ld1b(z30, __ B, p3, Address(sp, r8));",       "ld1b\t{z30.b}, p3/z, [sp, x8]"],
1327                         ["ld1w",   "__ sve_ld1w(z0, __ S, p4, Address(sp, r28));",       "ld1w\t{z0.s}, p4/z, [sp, x28, LSL #2]"],
1328                         ["ld1d",   "__ sve_ld1d(z11, __ D, p5, Address(r0, r1));",       "ld1d\t{z11.d}, p5/z, [x0, x1, LSL #3]"],
1329                         ["st1b",   "__ sve_st1b(z22, __ B, p6, Address(sp));",           "st1b\t{z22.b}, p6, [sp]"],
1330                         ["st1b",   "__ sve_st1b(z31, __ B, p7, Address(sp, -8));",       "st1b\t{z31.b}, p7, [sp, #-8, MUL VL]"],
1331                         ["st1w",   "__ sve_st1w(z0, __ S, p1, Address(r0, 7));",         "st1w\t{z0.s}, p1, [x0, #7, MUL VL]"],
1332                         ["st1b",   "__ sve_st1b(z0, __ B, p2, Address(sp, r1));",        "st1b\t{z0.b}, p2, [sp, x1]"],
1333                         ["st1h",   "__ sve_st1h(z0, __ H, p3, Address(sp, r8));",        "st1h\t{z0.h}, p3, [sp, x8, LSL #1]"],
1334                         ["st1d",   "__ sve_st1d(z0, __ D, p4, Address(r0, r18));",       "st1d\t{z0.d}, p4, [x0, x18, LSL #3]"],
1335                         ["ldr",    "__ sve_ldr(z0, Address(sp));",                       "ldr\tz0, [sp]"],
1336                         ["ldr",    "__ sve_ldr(z31, Address(sp, -256));",                "ldr\tz31, [sp, #-256, MUL VL]"],
1337                         ["str",    "__ sve_str(z8, Address(r8, 255));",                  "str\tz8, [x8, #255, MUL VL]"],
1338 ])
1339 
1340 print "\n// FloatImmediateOp"
1341 for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125", 
1342               "0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625", 
1343               "-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0", 
1344               "-0.125", "-0.1328125", "-0.25", "-0.265625", "-0.5", "-0.53125", "-1.0", "-1.0625"):
1345     astr = "fmov d0, #" + float
1346     cstr = "__ fmovd(v0, " + float + ");"
1347     print "    %-50s //\t%s" % (cstr, astr)
1348     outfile.write("\t" + astr + "\n")
1349 
1350 # ARMv8.1A
1351 for size in ("x", "w"):
1352     for suffix in ("", "a", "al", "l"):
1353         generate(LSEOp, [["swp", "swp", size, suffix],
1354                          ["ldadd", "ldadd", size, suffix],
1355                          ["ldbic", "ldclr", size, suffix],
1356                          ["ldeor", "ldeor", size, suffix],
1357                          ["ldorr", "ldset", size, suffix],
1358                          ["ldsmin", "ldsmin", size, suffix],
1359                          ["ldsmax", "ldsmax", size, suffix],
1360                          ["ldumin", "ldumin", size, suffix],
1361                          ["ldumax", "ldumax", size, suffix]]);
1362 
1363 generate(SVEVectorOp, [["add", "ZZZ"],
1364                        ["sub", "ZZZ"],
1365                        ["fadd", "ZZZ"],
1366                        ["fmul", "ZZZ"],
1367                        ["fsub", "ZZZ"],
1368                        ["abs", "ZPZ"],
1369                        ["add", "ZPZ", "dn"],
1370                        ["asr", "ZPZ", "dn"],
1371                        ["cnt", "ZPZ"],
1372                        ["lsl", "ZPZ", "dn"],
1373                        ["lsr", "ZPZ", "dn"],
1374                        ["mul", "ZPZ", "dn"],
1375                        ["neg", "ZPZ"],
1376                        ["not", "ZPZ"],
1377                        ["smax", "ZPZ", "dn"],
1378                        ["smin", "ZPZ", "dn"],
1379                        ["sub", "ZPZ", "dn"],
1380                        ["fabs", "ZPZ"],
1381                        ["fadd", "ZPZ", "dn"],
1382                        ["fdiv", "ZPZ", "dn"],
1383                        ["fmax", "ZPZ", "dn"],
1384                        ["fmin", "ZPZ", "dn"],
1385                        ["fmul", "ZPZ", "dn"],
1386                        ["fneg", "ZPZ"],
1387                        ["frintm", "ZPZ"],
1388                        ["frintn", "ZPZ"],
1389                        ["frintp", "ZPZ"],
1390                        ["fsqrt", "ZPZ"],
1391                        ["fsub", "ZPZ", "dn"],
1392                        ["fmla", "ZPZZ"],
1393                        ["fmls", "ZPZZ"],
1394                        ["fnmla", "ZPZZ"],
1395                        ["fnmls", "ZPZZ"],
1396                        ["mla", "ZPZZ"],
1397                        ["mls", "ZPZZ"],
1398                        ["and", "ZZZ"],
1399                        ["eor", "ZZZ"],
1400                        ["orr", "ZZZ"],
1401                       ])
1402 
1403 generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0],
1404                           ["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]])
1405 
1406 print "\n    __ bind(forth);"
1407 outfile.write("forth:\n")
1408 
1409 outfile.close()
1410 
1411 import subprocess
1412 import sys
1413 
1414 # compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension.
1415 subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"])
1416 
1417 print
1418 print "/*",
1419 sys.stdout.flush()
1420 subprocess.check_call([AARCH64_OBJDUMP, "-d", "aarch64ops.o"])
1421 print "*/"
1422 
1423 subprocess.check_call([AARCH64_OBJCOPY, "-O", "binary", "-j", ".text", "aarch64ops.o", "aarch64ops.bin"])
1424 
1425 infile = open("aarch64ops.bin", "r")
1426 bytes = bytearray(infile.read())
1427 
1428 print
1429 print "  static const unsigned int insns[] ="
1430 print "  {"
1431 
1432 i = 0
1433 while i < len(bytes):
1434      print "    0x%02x%02x%02x%02x," % (bytes[i+3], bytes[i+2], bytes[i+1], bytes[i]),
1435      i += 4
< prev index next >