OpenJDK / panama / dev
changeset 62810:1ee425a632ff vector-unstable tip
manual merge with default
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.gitattributes Tue Sep 08 15:28:06 2020 +0800 @@ -0,0 +1,1 @@ +* -text
--- a/.hgtags Wed Sep 02 20:33:29 2020 -0700 +++ b/.hgtags Tue Sep 08 15:28:06 2020 +0800 @@ -661,3 +661,4 @@ 5c18d696c7ce724ca36df13933aa53f50e12b9e0 jdk-16+11 fc8e62b399bd93d06e8d13dc3b384c450e853dcd jdk-16+12 fd07cdb26fc70243ef23d688b545514f4ddf1c2b jdk-16+13 +36b29df125dc88f11657ce93b4998aa9ff5f5d41 jdk-16+14
--- a/make/hotspot/gensrc/GensrcAdlc.gmk Wed Sep 02 20:33:29 2020 -0700 +++ b/make/hotspot/gensrc/GensrcAdlc.gmk Tue Sep 08 15:28:06 2020 +0800 @@ -132,6 +132,7 @@ ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64) AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_neon.ad \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \ ))) endif
--- a/src/hotspot/cpu/aarch64/aarch64-asmtest.py Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/aarch64-asmtest.py Tue Sep 08 15:28:06 2020 +0800 @@ -71,6 +71,49 @@ else: return self.astr("r") +class SVEVectorRegister(FloatRegister): + def __str__(self): + return self.astr("z") + +class SVEPRegister(Register): + def __str__(self): + return self.astr("p") + + def generate(self): + self.number = random.randint(0, 15) + return self + +class SVEGoverningPRegister(Register): + def __str__(self): + return self.astr("p") + def generate(self): + self.number = random.randint(0, 7) + return self + +class RegVariant(object): + def __init__(self, low, high): + self.number = random.randint(low, high) + + def astr(self): + nameMap = { + 0: ".b", + 1: ".h", + 2: ".s", + 3: ".d", + 4: ".q" + } + return nameMap.get(self.number) + + def cstr(self): + nameMap = { + 0: "__ B", + 1: "__ H", + 2: "__ S", + 3: "__ D", + 4: "__ Q" + } + return nameMap.get(self.number) + class FloatZero(Operand): def __str__(self): @@ -87,7 +130,10 @@ 'h' : FloatRegister, 's' : FloatRegister, 'd' : FloatRegister, - 'z' : FloatZero} + 'z' : FloatZero, + 'p' : SVEPRegister, + 'P' : SVEGoverningPRegister, + 'Z' : SVEVectorRegister} @classmethod def create(cls, mode): @@ -845,6 +891,100 @@ % tuple([Instruction.astr(self)] + [(self.reg[i].astr(self.modes[i])) for i in range(self.numRegs)])) +class SVEVectorOp(Instruction): + def __init__(self, args): + name = args[0] + regTypes = args[1] + regs = [] + for c in regTypes: + regs.append(OperandFactory.create(c).generate()) + self.reg = regs + self.numRegs = len(regs) + if regTypes[0] != "p" and regTypes[1] == 'P': + self._isPredicated = True + self._merge = "/m" + else: + self._isPredicated = False + self._merge ="" + + self._bitwiseop = False + if name[0] == 'f': + self._width = RegVariant(2, 3) + elif not self._isPredicated and (name == "and" or name == "eor" or name == "orr"): + self._width = RegVariant(3, 3) + self._bitwiseop = True + else: + self._width = RegVariant(0, 3) + if len(args) > 2: + self._dnm = args[2] + else: + self._dnm = None + Instruction.__init__(self, name) + + def cstr(self): + formatStr = "%s%s" + ''.join([", %s" for i in range(0, self.numRegs)] + [");"]) + if self._bitwiseop: + width = [] + formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)] + [");"]) + else: + width = [self._width.cstr()] + return (formatStr + % tuple(["__ sve_" + self._name + "("] + + [str(self.reg[0])] + + width + + [str(self.reg[i]) for i in range(1, self.numRegs)])) + def astr(self): + formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)]) + if self._dnm == 'dn': + formatStr += ", %s" + dnReg = [str(self.reg[0]) + self._width.astr()] + else: + dnReg = [] + + if self._isPredicated: + restRegs = [str(self.reg[1]) + self._merge] + dnReg + [str(self.reg[i]) + self._width.astr() for i in range(2, self.numRegs)] + else: + restRegs = dnReg + [str(self.reg[i]) + self._width.astr() for i in range(1, self.numRegs)] + return (formatStr + % tuple([Instruction.astr(self)] + + [str(self.reg[0]) + self._width.astr()] + + restRegs)) + def generate(self): + return self + +class SVEReductionOp(Instruction): + def __init__(self, args): + name = args[0] + lowRegType = args[1] + self.reg = [] + Instruction.__init__(self, name) + self.reg.append(OperandFactory.create('s').generate()) + self.reg.append(OperandFactory.create('P').generate()) + self.reg.append(OperandFactory.create('Z').generate()) + self._width = RegVariant(lowRegType, 3) + def cstr(self): + return "__ sve_%s(%s, %s, %s, %s);" % (self.name(), + str(self.reg[0]), + self._width.cstr(), + str(self.reg[1]), + str(self.reg[2])) + def astr(self): + if self.name() == "uaddv": + dstRegName = "d" + str(self.reg[0].number) + else: + dstRegName = self._width.astr()[1] + str(self.reg[0].number) + formatStr = "%s %s, %s, %s" + if self.name() == "fadda": + formatStr += ", %s" + moreReg = [dstRegName] + else: + moreReg = [] + return formatStr % tuple([self.name()] + + [dstRegName] + + [str(self.reg[1])] + + moreReg + + [str(self.reg[2]) + self._width.astr()]) + class LdStNEONOp(Instruction): def __init__(self, args): self._name, self.regnum, self.arrangement, self.addresskind = args @@ -1311,7 +1451,42 @@ ["mov", "__ mov(v1, __ T2S, 1, zr);", "mov\tv1.s[1], wzr"], ["mov", "__ mov(v1, __ T4H, 2, zr);", "mov\tv1.h[2], wzr"], ["mov", "__ mov(v1, __ T8B, 3, zr);", "mov\tv1.b[3], wzr"], - ["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"]]) + ["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"], + # SVE instructions + ["cpy", "__ sve_cpy(z0, __ S, p0, v1);", "mov\tz0.s, p0/m, s1"], + ["inc", "__ sve_inc(r0, __ S);", "incw\tx0"], + ["dec", "__ sve_dec(r1, __ H);", "dech\tx1"], + ["lsl", "__ sve_lsl(z0, __ B, z1, 7);", "lsl\tz0.b, z1.b, #7"], + ["lsl", "__ sve_lsl(z21, __ H, z1, 15);", "lsl\tz21.h, z1.h, #15"], + ["lsl", "__ sve_lsl(z0, __ S, z1, 31);", "lsl\tz0.s, z1.s, #31"], + ["lsl", "__ sve_lsl(z0, __ D, z1, 63);", "lsl\tz0.d, z1.d, #63"], + ["lsr", "__ sve_lsr(z0, __ B, z1, 7);", "lsr\tz0.b, z1.b, #7"], + ["asr", "__ sve_asr(z0, __ H, z11, 15);", "asr\tz0.h, z11.h, #15"], + ["lsr", "__ sve_lsr(z30, __ S, z1, 31);", "lsr\tz30.s, z1.s, #31"], + ["asr", "__ sve_asr(z0, __ D, z1, 63);", "asr\tz0.d, z1.d, #63"], + ["addvl", "__ sve_addvl(sp, r0, 31);", "addvl\tsp, x0, #31"], + ["addpl", "__ sve_addpl(r1, sp, -32);", "addpl\tx1, sp, -32"], + ["cntp", "__ sve_cntp(r8, __ B, p0, p1);", "cntp\tx8, p0, p1.b"], + ["dup", "__ sve_dup(z0, __ B, 127);", "dup\tz0.b, 127"], + ["dup", "__ sve_dup(z1, __ H, -128);", "dup\tz1.h, -128"], + ["dup", "__ sve_dup(z2, __ S, 32512);", "dup\tz2.s, 32512"], + ["dup", "__ sve_dup(z7, __ D, -32768);", "dup\tz7.d, -32768"], + ["ld1b", "__ sve_ld1b(z0, __ B, p0, Address(sp));", "ld1b\t{z0.b}, p0/z, [sp]"], + ["ld1h", "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));", "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"], + ["ld1w", "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));", "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"], + ["ld1b", "__ sve_ld1b(z30, __ B, p3, Address(sp, r8));", "ld1b\t{z30.b}, p3/z, [sp, x8]"], + ["ld1w", "__ sve_ld1w(z0, __ S, p4, Address(sp, r28));", "ld1w\t{z0.s}, p4/z, [sp, x28, LSL #2]"], + ["ld1d", "__ sve_ld1d(z11, __ D, p5, Address(r0, r1));", "ld1d\t{z11.d}, p5/z, [x0, x1, LSL #3]"], + ["st1b", "__ sve_st1b(z22, __ B, p6, Address(sp));", "st1b\t{z22.b}, p6, [sp]"], + ["st1b", "__ sve_st1b(z31, __ B, p7, Address(sp, -8));", "st1b\t{z31.b}, p7, [sp, #-8, MUL VL]"], + ["st1w", "__ sve_st1w(z0, __ S, p1, Address(r0, 7));", "st1w\t{z0.s}, p1, [x0, #7, MUL VL]"], + ["st1b", "__ sve_st1b(z0, __ B, p2, Address(sp, r1));", "st1b\t{z0.b}, p2, [sp, x1]"], + ["st1h", "__ sve_st1h(z0, __ H, p3, Address(sp, r8));", "st1h\t{z0.h}, p3, [sp, x8, LSL #1]"], + ["st1d", "__ sve_st1d(z0, __ D, p4, Address(r0, r18));", "st1d\t{z0.d}, p4, [x0, x18, LSL #3]"], + ["ldr", "__ sve_ldr(z0, Address(sp));", "ldr\tz0, [sp]"], + ["ldr", "__ sve_ldr(z31, Address(sp, -256));", "ldr\tz31, [sp, #-256, MUL VL]"], + ["str", "__ sve_str(z8, Address(r8, 255));", "str\tz8, [x8, #255, MUL VL]"], +]) print "\n// FloatImmediateOp" for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125", @@ -1336,18 +1511,59 @@ ["ldumin", "ldumin", size, suffix], ["ldumax", "ldumax", size, suffix]]); +generate(SVEVectorOp, [["add", "ZZZ"], + ["sub", "ZZZ"], + ["fadd", "ZZZ"], + ["fmul", "ZZZ"], + ["fsub", "ZZZ"], + ["abs", "ZPZ"], + ["add", "ZPZ", "dn"], + ["asr", "ZPZ", "dn"], + ["cnt", "ZPZ"], + ["lsl", "ZPZ", "dn"], + ["lsr", "ZPZ", "dn"], + ["mul", "ZPZ", "dn"], + ["neg", "ZPZ"], + ["not", "ZPZ"], + ["smax", "ZPZ", "dn"], + ["smin", "ZPZ", "dn"], + ["sub", "ZPZ", "dn"], + ["fabs", "ZPZ"], + ["fadd", "ZPZ", "dn"], + ["fdiv", "ZPZ", "dn"], + ["fmax", "ZPZ", "dn"], + ["fmin", "ZPZ", "dn"], + ["fmul", "ZPZ", "dn"], + ["fneg", "ZPZ"], + ["frintm", "ZPZ"], + ["frintn", "ZPZ"], + ["frintp", "ZPZ"], + ["fsqrt", "ZPZ"], + ["fsub", "ZPZ", "dn"], + ["fmla", "ZPZZ"], + ["fmls", "ZPZZ"], + ["fnmla", "ZPZZ"], + ["fnmls", "ZPZZ"], + ["mla", "ZPZZ"], + ["mls", "ZPZZ"], + ["and", "ZZZ"], + ["eor", "ZZZ"], + ["orr", "ZZZ"], + ]) + +generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0], + ["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]]) + print "\n __ bind(forth);" outfile.write("forth:\n") outfile.close() -# compile for 8.1 and sha2 because of lse atomics and sha512 crypto extension. -subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2", "aarch64ops.s", "-o", "aarch64ops.o"]) -output = subprocess.check_output([AARCH64_OBJDUMP, "-d", "aarch64ops.o"]) +# compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension. +subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"]) print print "/*" -print output print "*/" subprocess.check_call([AARCH64_OBJCOPY, "-O", "binary", "-j", ".text", "aarch64ops.o", "aarch64ops.bin"])
--- a/src/hotspot/cpu/aarch64/aarch64.ad Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/aarch64.ad Tue Sep 08 15:28:06 2020 +0800 @@ -69,7 +69,7 @@ // // r0-r7,r10-r26 volatile (caller save) // r27-r32 system (no save, no allocate) -// r8-r9 invisible to the allocator (so we can use them as scratch regs) +// r8-r9 non-allocatable (so we can use them as scratch regs) // // as regards Java usage. we don't use any callee save registers // because this makes it difficult to de-optimise a frame (see comment @@ -94,6 +94,10 @@ reg_def R6_H ( SOC, SOC, Op_RegI, 6, r6->as_VMReg()->next() ); reg_def R7 ( SOC, SOC, Op_RegI, 7, r7->as_VMReg() ); reg_def R7_H ( SOC, SOC, Op_RegI, 7, r7->as_VMReg()->next() ); +reg_def R8 ( NS, SOC, Op_RegI, 8, r8->as_VMReg() ); // rscratch1, non-allocatable +reg_def R8_H ( NS, SOC, Op_RegI, 8, r8->as_VMReg()->next() ); +reg_def R9 ( NS, SOC, Op_RegI, 9, r9->as_VMReg() ); // rscratch2, non-allocatable +reg_def R9_H ( NS, SOC, Op_RegI, 9, r9->as_VMReg()->next() ); reg_def R10 ( SOC, SOC, Op_RegI, 10, r10->as_VMReg() ); reg_def R10_H ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next()); reg_def R11 ( SOC, SOC, Op_RegI, 11, r11->as_VMReg() ); @@ -140,7 +144,7 @@ reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next()); // ---------------------------- -// Float/Double Registers +// Float/Double/Vector Registers // ---------------------------- // Double Registers @@ -161,165 +165,324 @@ // the platform ABI treats v8-v15 as callee save). float registers // v16-v31 are SOC as per the platform spec - reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() ); - reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() ); - reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) ); - reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) ); - - reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() ); - reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() ); - reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) ); - reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) ); - - reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() ); - reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() ); - reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) ); - reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) ); - - reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() ); - reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() ); - reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) ); - reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) ); - - reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() ); - reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() ); - reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) ); - reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) ); - - reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() ); - reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() ); - reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) ); - reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) ); - - reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() ); - reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() ); - reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) ); - reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) ); - - reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() ); - reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() ); - reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) ); - reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) ); - - reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() ); - reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() ); - reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) ); - reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) ); - - reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() ); - reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() ); - reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) ); - reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) ); - - reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() ); - reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() ); - reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2)); - reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3)); - - reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() ); - reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() ); - reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2)); - reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3)); - - reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() ); - reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() ); - reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2)); - reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3)); - - reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() ); - reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() ); - reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2)); - reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3)); - - reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() ); - reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() ); - reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2)); - reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3)); - - reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() ); - reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() ); - reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2)); - reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3)); - - reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() ); - reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() ); - reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2)); - reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3)); - - reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() ); - reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() ); - reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2)); - reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3)); - - reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() ); - reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() ); - reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2)); - reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3)); - - reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() ); - reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() ); - reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2)); - reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3)); - - reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() ); - reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() ); - reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2)); - reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3)); - - reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() ); - reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() ); - reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2)); - reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3)); - - reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() ); - reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() ); - reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2)); - reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3)); - - reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() ); - reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() ); - reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2)); - reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3)); - - reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() ); - reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() ); - reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2)); - reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3)); - - reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() ); - reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() ); - reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2)); - reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3)); - - reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() ); - reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() ); - reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2)); - reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3)); - - reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() ); - reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() ); - reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2)); - reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3)); - - reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() ); - reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() ); - reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2)); - reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3)); - - reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() ); - reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() ); - reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2)); - reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3)); - - reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() ); - reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() ); - reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2)); - reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3)); - - reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() ); - reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() ); - reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2)); - reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3)); +// For SVE vector registers, we simply extend vector register size to 8 +// 'logical' slots. This is nominally 256 bits but it actually covers +// all possible 'physical' SVE vector register lengths from 128 ~ 2048 +// bits. The 'physical' SVE vector register length is detected during +// startup, so the register allocator is able to identify the correct +// number of bytes needed for an SVE spill/unspill. +// Note that a vector register with 4 slots denotes a 128-bit NEON +// register allowing it to be distinguished from the corresponding SVE +// vector register when the SVE vector length is 128 bits. + + reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() ); + reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() ); + reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) ); + reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) ); + reg_def V0_L ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(4) ); + reg_def V0_M ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(5) ); + reg_def V0_N ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(6) ); + reg_def V0_O ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(7) ); + + reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() ); + reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() ); + reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) ); + reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) ); + reg_def V1_L ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(4) ); + reg_def V1_M ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(5) ); + reg_def V1_N ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(6) ); + reg_def V1_O ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(7) ); + + reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() ); + reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() ); + reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) ); + reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) ); + reg_def V2_L ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(4) ); + reg_def V2_M ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(5) ); + reg_def V2_N ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(6) ); + reg_def V2_O ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(7) ); + + reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() ); + reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() ); + reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) ); + reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) ); + reg_def V3_L ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(4) ); + reg_def V3_M ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(5) ); + reg_def V3_N ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(6) ); + reg_def V3_O ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(7) ); + + reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() ); + reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() ); + reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) ); + reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) ); + reg_def V4_L ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(4) ); + reg_def V4_M ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(5) ); + reg_def V4_N ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(6) ); + reg_def V4_O ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(7) ); + + reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() ); + reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() ); + reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) ); + reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) ); + reg_def V5_L ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(4) ); + reg_def V5_M ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(5) ); + reg_def V5_N ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(6) ); + reg_def V5_O ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(7) ); + + reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() ); + reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() ); + reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) ); + reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) ); + reg_def V6_L ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(4) ); + reg_def V6_M ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(5) ); + reg_def V6_N ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(6) ); + reg_def V6_O ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(7) ); + + reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() ); + reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() ); + reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) ); + reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) ); + reg_def V7_L ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(4) ); + reg_def V7_M ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(5) ); + reg_def V7_N ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(6) ); + reg_def V7_O ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(7) ); + + reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() ); + reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() ); + reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) ); + reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) ); + reg_def V8_L ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(4) ); + reg_def V8_M ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(5) ); + reg_def V8_N ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(6) ); + reg_def V8_O ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(7) ); + + reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() ); + reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() ); + reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) ); + reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) ); + reg_def V9_L ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(4) ); + reg_def V9_M ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(5) ); + reg_def V9_N ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(6) ); + reg_def V9_O ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(7) ); + + reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() ); + reg_def V10_H ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() ); + reg_def V10_J ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2) ); + reg_def V10_K ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3) ); + reg_def V10_L ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(4) ); + reg_def V10_M ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(5) ); + reg_def V10_N ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(6) ); + reg_def V10_O ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(7) ); + + reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() ); + reg_def V11_H ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() ); + reg_def V11_J ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2) ); + reg_def V11_K ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3) ); + reg_def V11_L ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(4) ); + reg_def V11_M ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(5) ); + reg_def V11_N ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(6) ); + reg_def V11_O ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(7) ); + + reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() ); + reg_def V12_H ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() ); + reg_def V12_J ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2) ); + reg_def V12_K ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3) ); + reg_def V12_L ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(4) ); + reg_def V12_M ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(5) ); + reg_def V12_N ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(6) ); + reg_def V12_O ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(7) ); + + reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() ); + reg_def V13_H ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() ); + reg_def V13_J ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2) ); + reg_def V13_K ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3) ); + reg_def V13_L ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(4) ); + reg_def V13_M ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(5) ); + reg_def V13_N ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(6) ); + reg_def V13_O ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(7) ); + + reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() ); + reg_def V14_H ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() ); + reg_def V14_J ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2) ); + reg_def V14_K ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3) ); + reg_def V14_L ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(4) ); + reg_def V14_M ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(5) ); + reg_def V14_N ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(6) ); + reg_def V14_O ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(7) ); + + reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() ); + reg_def V15_H ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() ); + reg_def V15_J ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2) ); + reg_def V15_K ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3) ); + reg_def V15_L ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(4) ); + reg_def V15_M ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(5) ); + reg_def V15_N ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(6) ); + reg_def V15_O ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(7) ); + + reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() ); + reg_def V16_H ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() ); + reg_def V16_J ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2) ); + reg_def V16_K ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3) ); + reg_def V16_L ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(4) ); + reg_def V16_M ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(5) ); + reg_def V16_N ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(6) ); + reg_def V16_O ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(7) ); + + reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() ); + reg_def V17_H ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() ); + reg_def V17_J ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2) ); + reg_def V17_K ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3) ); + reg_def V17_L ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(4) ); + reg_def V17_M ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(5) ); + reg_def V17_N ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(6) ); + reg_def V17_O ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(7) ); + + reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() ); + reg_def V18_H ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() ); + reg_def V18_J ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2) ); + reg_def V18_K ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3) ); + reg_def V18_L ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(4) ); + reg_def V18_M ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(5) ); + reg_def V18_N ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(6) ); + reg_def V18_O ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(7) ); + + reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() ); + reg_def V19_H ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() ); + reg_def V19_J ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2) ); + reg_def V19_K ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3) ); + reg_def V19_L ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(4) ); + reg_def V19_M ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(5) ); + reg_def V19_N ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(6) ); + reg_def V19_O ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(7) ); + + reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() ); + reg_def V20_H ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() ); + reg_def V20_J ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2) ); + reg_def V20_K ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3) ); + reg_def V20_L ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(4) ); + reg_def V20_M ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(5) ); + reg_def V20_N ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(6) ); + reg_def V20_O ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(7) ); + + reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() ); + reg_def V21_H ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() ); + reg_def V21_J ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2) ); + reg_def V21_K ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3) ); + reg_def V21_L ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(4) ); + reg_def V21_M ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(5) ); + reg_def V21_N ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(6) ); + reg_def V21_O ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(7) ); + + reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() ); + reg_def V22_H ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() ); + reg_def V22_J ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2) ); + reg_def V22_K ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3) ); + reg_def V22_L ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(4) ); + reg_def V22_M ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(5) ); + reg_def V22_N ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(6) ); + reg_def V22_O ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(7) ); + + reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() ); + reg_def V23_H ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() ); + reg_def V23_J ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2) ); + reg_def V23_K ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3) ); + reg_def V23_L ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(4) ); + reg_def V23_M ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(5) ); + reg_def V23_N ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(6) ); + reg_def V23_O ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(7) ); + + reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() ); + reg_def V24_H ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() ); + reg_def V24_J ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2) ); + reg_def V24_K ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3) ); + reg_def V24_L ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(4) ); + reg_def V24_M ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(5) ); + reg_def V24_N ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(6) ); + reg_def V24_O ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(7) ); + + reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() ); + reg_def V25_H ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() ); + reg_def V25_J ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2) ); + reg_def V25_K ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3) ); + reg_def V25_L ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(4) ); + reg_def V25_M ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(5) ); + reg_def V25_N ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(6) ); + reg_def V25_O ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(7) ); + + reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() ); + reg_def V26_H ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() ); + reg_def V26_J ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2) ); + reg_def V26_K ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3) ); + reg_def V26_L ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(4) ); + reg_def V26_M ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(5) ); + reg_def V26_N ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(6) ); + reg_def V26_O ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(7) ); + + reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() ); + reg_def V27_H ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() ); + reg_def V27_J ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2) ); + reg_def V27_K ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3) ); + reg_def V27_L ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(4) ); + reg_def V27_M ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(5) ); + reg_def V27_N ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(6) ); + reg_def V27_O ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(7) ); + + reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() ); + reg_def V28_H ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() ); + reg_def V28_J ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2) ); + reg_def V28_K ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3) ); + reg_def V28_L ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(4) ); + reg_def V28_M ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(5) ); + reg_def V28_N ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(6) ); + reg_def V28_O ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(7) ); + + reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() ); + reg_def V29_H ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() ); + reg_def V29_J ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2) ); + reg_def V29_K ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3) ); + reg_def V29_L ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(4) ); + reg_def V29_M ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(5) ); + reg_def V29_N ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(6) ); + reg_def V29_O ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(7) ); + + reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() ); + reg_def V30_H ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() ); + reg_def V30_J ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2) ); + reg_def V30_K ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3) ); + reg_def V30_L ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(4) ); + reg_def V30_M ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(5) ); + reg_def V30_N ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(6) ); + reg_def V30_O ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(7) ); + + reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() ); + reg_def V31_H ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() ); + reg_def V31_J ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2) ); + reg_def V31_K ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3) ); + reg_def V31_L ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(4) ); + reg_def V31_M ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(5) ); + reg_def V31_N ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(6) ); + reg_def V31_O ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(7) ); + + +// ---------------------------- +// SVE Predicate Registers +// ---------------------------- + reg_def P0 (SOC, SOC, Op_RegVMask, 0, p0->as_VMReg()); + reg_def P1 (SOC, SOC, Op_RegVMask, 1, p1->as_VMReg()); + reg_def P2 (SOC, SOC, Op_RegVMask, 2, p2->as_VMReg()); + reg_def P3 (SOC, SOC, Op_RegVMask, 3, p3->as_VMReg()); + reg_def P4 (SOC, SOC, Op_RegVMask, 4, p4->as_VMReg()); + reg_def P5 (SOC, SOC, Op_RegVMask, 5, p5->as_VMReg()); + reg_def P6 (SOC, SOC, Op_RegVMask, 6, p6->as_VMReg()); + reg_def P7 (SOC, SOC, Op_RegVMask, 7, p7->as_VMReg()); + reg_def P8 (SOC, SOC, Op_RegVMask, 8, p8->as_VMReg()); + reg_def P9 (SOC, SOC, Op_RegVMask, 9, p9->as_VMReg()); + reg_def P10 (SOC, SOC, Op_RegVMask, 10, p10->as_VMReg()); + reg_def P11 (SOC, SOC, Op_RegVMask, 11, p11->as_VMReg()); + reg_def P12 (SOC, SOC, Op_RegVMask, 12, p12->as_VMReg()); + reg_def P13 (SOC, SOC, Op_RegVMask, 13, p13->as_VMReg()); + reg_def P14 (SOC, SOC, Op_RegVMask, 14, p14->as_VMReg()); + reg_def P15 (SOC, SOC, Op_RegVMask, 15, p15->as_VMReg()); // ---------------------------- // Special Registers @@ -333,7 +496,6 @@ reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad()); - // Specify priority of register selection within phases of register // allocation. Highest priority is first. A useful heuristic is to // give registers a low priority when they are required by machine @@ -381,50 +543,72 @@ R29, R29_H, // fp R30, R30_H, // lr R31, R31_H, // sp + R8, R8_H, // rscratch1 + R9, R9_H, // rscratch2 ); alloc_class chunk1( // no save - V16, V16_H, V16_J, V16_K, - V17, V17_H, V17_J, V17_K, - V18, V18_H, V18_J, V18_K, - V19, V19_H, V19_J, V19_K, - V20, V20_H, V20_J, V20_K, - V21, V21_H, V21_J, V21_K, - V22, V22_H, V22_J, V22_K, - V23, V23_H, V23_J, V23_K, - V24, V24_H, V24_J, V24_K, - V25, V25_H, V25_J, V25_K, - V26, V26_H, V26_J, V26_K, - V27, V27_H, V27_J, V27_K, - V28, V28_H, V28_J, V28_K, - V29, V29_H, V29_J, V29_K, - V30, V30_H, V30_J, V30_K, - V31, V31_H, V31_J, V31_K, + V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O, + V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O, + V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O, + V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O, + V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O, + V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O, + V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O, + V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O, + V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O, + V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O, + V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O, + V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O, + V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O, + V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O, + V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O, + V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O, // arg registers - V0, V0_H, V0_J, V0_K, - V1, V1_H, V1_J, V1_K, - V2, V2_H, V2_J, V2_K, - V3, V3_H, V3_J, V3_K, - V4, V4_H, V4_J, V4_K, - V5, V5_H, V5_J, V5_K, - V6, V6_H, V6_J, V6_K, - V7, V7_H, V7_J, V7_K, + V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O, + V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O, + V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O, + V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O, + V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O, + V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O, + V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O, + V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O, // non-volatiles - V8, V8_H, V8_J, V8_K, - V9, V9_H, V9_J, V9_K, - V10, V10_H, V10_J, V10_K, - V11, V11_H, V11_J, V11_K, - V12, V12_H, V12_J, V12_K, - V13, V13_H, V13_J, V13_K, - V14, V14_H, V14_J, V14_K, - V15, V15_H, V15_J, V15_K, -); - -alloc_class chunk2(RFLAGS); + V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O, + V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O, + V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O, + V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O, + V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O, + V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O, + V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O, + V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O, +); + +alloc_class chunk2 ( + P0, + P1, + P2, + P3, + P4, + P5, + P6, + P7, + + P8, + P9, + P10, + P11, + P12, + P13, + P14, + P15, +); + +alloc_class chunk3(RFLAGS); //----------Architecture Description Register Classes-------------------------- // Several register classes are automatically defined based upon information in @@ -708,6 +892,42 @@ V31, V31_H ); +// Class for all SVE vector registers. +reg_class vectora_reg ( + V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O, + V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O, + V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O, + V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O, + V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O, + V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O, + V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O, + V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O, + V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O, + V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O, + V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O, + V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O, + V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O, + V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O, + V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O, + V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O, + V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O, + V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O, + V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O, + V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O, + V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O, + V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O, + V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O, + V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O, + V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O, + V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O, + V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O, + V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O, + V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O, + V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O, + V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O, + V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O, +); + // Class for all 64bit vector registers reg_class vectord_reg( V0, V0_H, @@ -940,6 +1160,39 @@ V31, V31_H ); +// Class for all SVE predicate registers. +reg_class pr_reg ( + P0, + P1, + P2, + P3, + P4, + P5, + P6, + // P7, non-allocatable, preserved with all elements preset to TRUE. + P8, + P9, + P10, + P11, + P12, + P13, + P14, + P15 +); + +// Class for SVE governing predicate registers, which are used +// to determine the active elements of a predicated instruction. +reg_class gov_pr ( + P0, + P1, + P2, + P3, + P4, + P5, + P6, + // P7, non-allocatable, preserved with all elements preset to TRUE. +); + // Singleton class for condition codes reg_class int_flags(RFLAGS); @@ -1644,6 +1897,10 @@ __ bind(L_skip_barrier); } + if (UseSVE > 0 && C->max_vector_size() >= 16) { + __ reinitialize_ptrue(); + } + int bangsize = C->output()->bang_size_in_bytes(); if (C->output()->need_stack_bang(bangsize) && UseStackBanging) __ generate_stack_overflow_check(bangsize); @@ -1742,7 +1999,7 @@ // Figure out which register class each belongs in: rc_int, rc_float or // rc_stack. -enum RC { rc_bad, rc_int, rc_float, rc_stack }; +enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack }; static enum RC rc_class(OptoReg::Name reg) { @@ -1750,20 +2007,25 @@ return rc_bad; } - // we have 30 int registers * 2 halves - // (rscratch1 and rscratch2 are omitted) - int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2); + // we have 32 int registers * 2 halves + int slots_of_int_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers; if (reg < slots_of_int_registers) { return rc_int; } - // we have 32 float register * 4 halves - if (reg < slots_of_int_registers + FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers) { + // we have 32 float register * 8 halves + int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers; + if (reg < slots_of_int_registers + slots_of_float_registers) { return rc_float; } - // Between float regs & stack is the flags regs. + int slots_of_predicate_registers = PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers; + if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_predicate_registers) { + return rc_predicate; + } + + // Between predicate regs & stack is the flags. assert(OptoReg::is_stack(reg), "blow up if spilling flags"); return rc_stack; @@ -1802,8 +2064,28 @@ if (bottom_type()->isa_vect() != NULL) { uint ireg = ideal_reg(); - assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); - if (cbuf) { + if (ireg == Op_VecA && cbuf) { + C2_MacroAssembler _masm(cbuf); + int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { + // stack->stack + __ spill_copy_sve_vector_stack_to_stack(src_offset, dst_offset, + sve_vector_reg_size_in_bytes); + } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { + __ spill_sve_vector(as_FloatRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo), + sve_vector_reg_size_in_bytes); + } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { + __ unspill_sve_vector(as_FloatRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo), + sve_vector_reg_size_in_bytes); + } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { + __ sve_orr(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } else { + ShouldNotReachHere(); + } + } else if (cbuf) { + assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); C2_MacroAssembler _masm(cbuf); assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity"); if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { @@ -1821,12 +2103,12 @@ as_FloatRegister(Matcher::_regEncode[src_lo])); } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), - ireg == Op_VecD ? __ D : __ Q, - ra_->reg2offset(dst_lo)); + ireg == Op_VecD ? __ D : __ Q, + ra_->reg2offset(dst_lo)); } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), - ireg == Op_VecD ? __ D : __ Q, - ra_->reg2offset(src_lo)); + ireg == Op_VecD ? __ D : __ Q, + ra_->reg2offset(src_lo)); } else { ShouldNotReachHere(); } @@ -1911,9 +2193,24 @@ st->print("%s", Matcher::regName[dst_lo]); } if (bottom_type()->isa_vect() != NULL) { - st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128); + int vsize = 0; + switch (ideal_reg()) { + case Op_VecD: + vsize = 64; + break; + case Op_VecX: + vsize = 128; + break; + case Op_VecA: + vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8; + break; + default: + assert(false, "bad register type for spill"); + ShouldNotReachHere(); + } + st->print("\t# vector spill size = %d", vsize); } else { - st->print("\t# spill size = %d", is64 ? 64:32); + st->print("\t# spill size = %d", is64 ? 64 : 32); } } @@ -2082,28 +2379,37 @@ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { return false; } - - // Special cases which require vector length - switch (opcode) { - case Op_MulAddVS2VI: { - if (vlen != 4) { + int bit_size = vlen * type2aelembytes(bt) * 8; + if (UseSVE == 0 && bit_size > 128) { + return false; + } + if (UseSVE > 0) { + return op_sve_supported(opcode); + } else { // NEON + // Special cases + switch (opcode) { + case Op_MulAddVS2VI: + if (bit_size < 128) { return false; } break; - } + case Op_MulVL: + return false; case Op_VectorLoadShuffle: case Op_VectorRearrange: if (vlen < 4) { return false; } break; - } - + default: + break; + } + } return true; // Per default match rules are supported. } const bool Matcher::has_predicated_vectors(void) { - return false; + return UseSVE > 0; } bool Matcher::supports_vector_variable_shifts(void) { @@ -2143,7 +2449,8 @@ // Vector width in bytes. const int Matcher::vector_width_in_bytes(BasicType bt) { - int size = MIN2(16,(int)MaxVectorSize); + // The MaxVectorSize should have been set by detecting SVE max vector register size. + int size = MIN2((UseSVE > 0) ? 256 : 16, (int)MaxVectorSize); // Minimum 2 values in vector if (size < 2*type2aelembytes(bt)) size = 0; // But never < 4 @@ -2157,21 +2464,38 @@ } const int Matcher::min_vector_size(const BasicType bt) { int max_size = max_vector_size(bt); - // Limit the vector size to 8 bytes - int size = 8 / type2aelembytes(bt); - if (bt == T_BYTE) { - // To support vector api shuffle/rearrange. - size = 4; - } else if (bt == T_BOOLEAN) { - // To support vector api load/store mask. - size = 2; - } - if (size < 2) size = 2; - return MIN2(size,max_size); + if ((UseSVE > 0) && (MaxVectorSize >= 16)) { + // Currently vector length less than SVE vector register size is not supported. + return max_size; + } else { // NEON + // Limit the vector size to 8 bytes + int size = 8 / type2aelembytes(bt); + if (bt == T_BYTE) { + // To support vector api shuffle/rearrange. + size = 4; + } else if (bt == T_BOOLEAN) { + // To support vector api load/store mask. + size = 2; + } + if (size < 2) size = 2; + return MIN2(size,max_size); + } +} + +const bool Matcher::supports_scalable_vector() { + return UseSVE > 0; +} + +// Actual max scalable vector register length. +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return Matcher::max_vector_size(bt); } // Vector ideal reg. const uint Matcher::vector_ideal_reg(int len) { + if (UseSVE > 0 && 16 <= len && len <= 256) { + return Op_VecA; + } switch(len) { // For 16-bit/32-bit mask vector, reuse VecD. case 2: @@ -3455,6 +3779,11 @@ if (call == NULL) { ciEnv::current()->record_failure("CodeCache is full"); return; + } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { + // Only non uncommon_trap calls need to reinitialize ptrue. + if (uncommon_trap_request() == 0) { + __ reinitialize_ptrue(); + } } %} @@ -3465,6 +3794,8 @@ if (call == NULL) { ciEnv::current()->record_failure("CodeCache is full"); return; + } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { + __ reinitialize_ptrue(); } %} @@ -3501,6 +3832,9 @@ __ bind(retaddr); __ add(sp, sp, 2 * wordSize); } + if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { + __ reinitialize_ptrue(); + } %} enc_class aarch64_enc_rethrow() %{ @@ -3510,6 +3844,11 @@ enc_class aarch64_enc_ret() %{ C2_MacroAssembler _masm(&cbuf); +#ifdef ASSERT + if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { + __ verify_ptrue(); + } +#endif __ ret(lr); %} @@ -4300,6 +4639,41 @@ interface(CONST_INTER); %} +// 8 bit signed value. +operand immI8() +%{ + predicate(n->get_int() <= 127 && n->get_int() >= -128); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 8 bit signed value (simm8), or #simm8 LSL 8. +operand immI8_shift8() +%{ + predicate((n->get_int() <= 127 && n->get_int() >= -128) || + (n->get_int() <= 32512 && n->get_int() >= -32768 && (n->get_int() & 0xff) == 0)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 8 bit signed value (simm8), or #simm8 LSL 8. +operand immL8_shift8() +%{ + predicate((n->get_long() <= 127 && n->get_long() >= -128) || + (n->get_long() <= 32512 && n->get_long() >= -32768 && (n->get_long() & 0xff) == 0)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + // 32 bit integer valid for add sub immediate operand immIAddSub() %{ @@ -4918,6 +5292,18 @@ interface(REG_INTER); %} +// Generic vector class. This will be used for +// all vector operands, including NEON and SVE, +// but currently only used for SVE VecA. +operand vReg() +%{ + constraint(ALLOC_IN_RC(vectora_reg)); + match(VecA); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + operand vecD() %{ constraint(ALLOC_IN_RC(vectord_reg)); @@ -5226,6 +5612,15 @@ interface(REG_INTER); %} +operand pRegGov() +%{ + constraint(ALLOC_IN_RC(gov_pr)); + match(RegVMask); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + // Flags register, used as output of signed compare instructions // note that on AArch64 we also use this register as the output for @@ -13639,6 +14034,77 @@ ins_pipe(fp_uop_d); %} +instruct copySignD_reg(vRegD dst, vRegD src1, vRegD src2, vRegD zero) %{ + match(Set dst (CopySignD src1 (Binary src2 zero))); + effect(TEMP_DEF dst, USE src1, USE src2, USE zero); + format %{ "CopySignD $dst $src1 $src2" %} + ins_encode %{ + FloatRegister dst = as_FloatRegister($dst$$reg), + src1 = as_FloatRegister($src1$$reg), + src2 = as_FloatRegister($src2$$reg), + zero = as_FloatRegister($zero$$reg); + __ fnegd(dst, zero); + __ bsl(dst, __ T8B, src2, src1); + %} + ins_pipe(fp_uop_d); +%} + +instruct copySignF_reg(vRegF dst, vRegF src1, vRegF src2) %{ + match(Set dst (CopySignF src1 src2)); + effect(TEMP_DEF dst, USE src1, USE src2); + format %{ "CopySignF $dst $src1 $src2" %} + ins_encode %{ + FloatRegister dst = as_FloatRegister($dst$$reg), + src1 = as_FloatRegister($src1$$reg), + src2 = as_FloatRegister($src2$$reg); + __ movi(dst, __ T2S, 0x80, 24); + __ bsl(dst, __ T8B, src2, src1); + %} + ins_pipe(fp_uop_d); +%} + +instruct signumD_reg(vRegD dst, vRegD src, vRegD zero, vRegD one) %{ + match(Set dst (SignumD src (Binary zero one))); + effect(TEMP_DEF dst, USE src, USE zero, USE one); + format %{ "signumD $dst, $src" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg), + dst = as_FloatRegister($dst$$reg), + zero = as_FloatRegister($zero$$reg), + one = as_FloatRegister($one$$reg); + __ facgtd(dst, src, zero); // dst=0 for +-0.0 and NaN. 0xFFF..F otherwise + __ ushrd(dst, dst, 1); // dst=0 for +-0.0 and NaN. 0x7FF..F otherwise + // Bit selection instruction gets bit from "one" for each enabled bit in + // "dst", otherwise gets a bit from "src". For "src" that contains +-0.0 or + // NaN the whole "src" will be copied because "dst" is zero. For all other + // "src" values dst is 0x7FF..F, which means only the sign bit is copied + // from "src", and all other bits are copied from 1.0. + __ bsl(dst, __ T8B, one, src); + %} + ins_pipe(fp_uop_d); +%} + +instruct signumF_reg(vRegF dst, vRegF src, vRegF zero, vRegF one) %{ + match(Set dst (SignumF src (Binary zero one))); + effect(TEMP_DEF dst, USE src, USE zero, USE one); + format %{ "signumF $dst, $src" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg), + dst = as_FloatRegister($dst$$reg), + zero = as_FloatRegister($zero$$reg), + one = as_FloatRegister($one$$reg); + __ facgts(dst, src, zero); // dst=0 for +-0.0 and NaN. 0xFFF..F otherwise + __ ushr(dst, __ T2S, dst, 1); // dst=0 for +-0.0 and NaN. 0x7FF..F otherwise + // Bit selection instruction gets bit from "one" for each enabled bit in + // "dst", otherwise gets a bit from "src". For "src" that contains +-0.0 or + // NaN the whole "src" will be copied because "dst" is zero. For all other + // "src" values dst is 0x7FF..F, which means only the sign bit is copied + // from "src", and all other bits are copied from 1.0. + __ bsl(dst, __ T8B, one, src); + %} + ins_pipe(fp_uop_d); +%} + // ============================================================================ // Logical Instructions @@ -16151,7 +16617,7 @@ // Load Vector (128 bits) instruct loadV16(vecX dst, vmem16 mem) %{ - predicate(n->as_LoadVector()->memory_size() == 16); + predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16); match(Set dst (LoadVector mem)); ins_cost(4 * INSN_COST); format %{ "ldrq $dst,$mem\t# vector (128 bits)" %} @@ -16207,7 +16673,7 @@ instruct replicate16B(vecX dst, iRegIorL2I src) %{ - predicate(n->as_Vector()->length() == 16); + predicate(UseSVE == 0 && n->as_Vector()->length() == 16); match(Set dst (ReplicateB src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (16B)" %} @@ -16232,7 +16698,7 @@ instruct replicate16B_imm(vecX dst, immI con) %{ - predicate(n->as_Vector()->length() == 16); + predicate(UseSVE == 0 && n->as_Vector()->length() == 16); match(Set dst (ReplicateB con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(16B)" %} @@ -16257,7 +16723,7 @@ instruct replicate8S(vecX dst, iRegIorL2I src) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseSVE == 0 && n->as_Vector()->length() == 8); match(Set dst (ReplicateS src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (8S)" %} @@ -16282,7 +16748,7 @@ instruct replicate8S_imm(vecX dst, immI con) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseSVE == 0 && n->as_Vector()->length() == 8); match(Set dst (ReplicateS con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(8H)" %} @@ -16306,7 +16772,7 @@ instruct replicate4I(vecX dst, iRegIorL2I src) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseSVE == 0 && n->as_Vector()->length() == 4); match(Set dst (ReplicateI src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (4I)" %} @@ -16330,7 +16796,7 @@ instruct replicate4I_imm(vecX dst, immI con) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseSVE == 0 && n->as_Vector()->length() == 4); match(Set dst (ReplicateI con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(4I)" %} @@ -16342,7 +16808,7 @@ instruct replicate2L(vecX dst, iRegL src) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseSVE == 0 && n->as_Vector()->length() == 2); match(Set dst (ReplicateL src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (2L)" %} @@ -16354,7 +16820,7 @@ instruct replicate2L_zero(vecX dst, immI0 zero) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseSVE == 0 && n->as_Vector()->length() == 2); match(Set dst (ReplicateI zero)); ins_cost(INSN_COST); format %{ "movi $dst, $zero\t# vector(4I)" %} @@ -16381,7 +16847,7 @@ instruct replicate4F(vecX dst, vRegF src) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseSVE == 0 && n->as_Vector()->length() == 4); match(Set dst (ReplicateF src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (4F)" %} @@ -16394,7 +16860,7 @@ instruct replicate2D(vecX dst, vRegD src) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseSVE == 0 && n->as_Vector()->length() == 2); match(Set dst (ReplicateD src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (2D)" %}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad Tue Sep 08 15:28:06 2020 +0800 @@ -0,0 +1,1637 @@ +// +// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2020, Arm Limited. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ---- + +// AArch64 SVE Architecture Description File + + +// 4 bit signed offset -- for predicated load/store + +operand vmemA_immIOffset4() +%{ + predicate(Address::offset_ok_for_sve_immed(n->get_int(), 4, + Matcher::scalable_vector_reg_size(T_BYTE))); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand vmemA_immLOffset4() +%{ + predicate(Address::offset_ok_for_sve_immed(n->get_long(), 4, + Matcher::scalable_vector_reg_size(T_BYTE))); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + + +operand vmemA_indOffI4(iRegP reg, vmemA_immIOffset4 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off, MUL VL]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand vmemA_indOffL4(iRegP reg, vmemA_immLOffset4 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off, MUL VL]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4); + +source_hpp %{ + bool op_sve_supported(int opcode); +%} + +source %{ + + static inline BasicType vector_element_basic_type(const MachNode* n) { + const TypeVect* vt = n->bottom_type()->is_vect(); + return vt->element_basic_type(); + } + + static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) { + int def_idx = use->operand_index(opnd); + Node* def = use->in(def_idx); + const TypeVect* vt = def->bottom_type()->is_vect(); + return vt->element_basic_type(); + } + + typedef void (C2_MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T, + PRegister Pg, const Address &adr); + + // Predicated load/store, with optional ptrue to all elements of given predicate register. + static void loadStoreA_predicate(C2_MacroAssembler masm, bool is_store, + FloatRegister reg, PRegister pg, BasicType bt, + int opcode, Register base, int index, int size, int disp) { + sve_mem_insn_predicate insn; + Assembler::SIMD_RegVariant type; + int esize = type2aelembytes(bt); + if (index == -1) { + assert(size == 0, "unsupported address mode: scale size = %d", size); + switch(esize) { + case 1: + insn = is_store ? &C2_MacroAssembler::sve_st1b : &C2_MacroAssembler::sve_ld1b; + type = Assembler::B; + break; + case 2: + insn = is_store ? &C2_MacroAssembler::sve_st1h : &C2_MacroAssembler::sve_ld1h; + type = Assembler::H; + break; + case 4: + insn = is_store ? &C2_MacroAssembler::sve_st1w : &C2_MacroAssembler::sve_ld1w; + type = Assembler::S; + break; + case 8: + insn = is_store ? &C2_MacroAssembler::sve_st1d : &C2_MacroAssembler::sve_ld1d; + type = Assembler::D; + break; + default: + assert(false, "unsupported"); + ShouldNotReachHere(); + } + (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE))); + } else { + assert(false, "unimplemented"); + ShouldNotReachHere(); + } + } + + bool op_sve_supported(int opcode) { + switch (opcode) { + case Op_MulAddVS2VI: + // No multiply reduction instructions + case Op_MulReductionVD: + case Op_MulReductionVF: + case Op_MulReductionVI: + case Op_MulReductionVL: + // Others + case Op_Extract: + case Op_ExtractB: + case Op_ExtractC: + case Op_ExtractD: + case Op_ExtractF: + case Op_ExtractI: + case Op_ExtractL: + case Op_ExtractS: + case Op_ExtractUB: + return false; + default: + return true; + } + } + +%} + +definitions %{ + int_def SVE_COST (200, 200); +%} + + + + +// All SVE instructions + +// vector load/store + +// Use predicated vector load/store +instruct loadV(vReg dst, vmemA mem) %{ + predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16); + match(Set dst (LoadVector mem)); + ins_cost(SVE_COST); + format %{ "sve_ldr $dst, $mem\t # vector (sve)" %} + ins_encode %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStoreA_predicate(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue, + vector_element_basic_type(this), $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +instruct storeV(vReg src, vmemA mem) %{ + predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16); + match(Set mem (StoreVector mem src)); + ins_cost(SVE_COST); + format %{ "sve_str $mem, $src\t # vector (sve)" %} + ins_encode %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStoreA_predicate(C2_MacroAssembler(&cbuf), true, src_reg, ptrue, + vector_element_basic_type(this, $src), $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + + +// vector abs + +instruct vabsB(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (AbsVB src)); + ins_cost(SVE_COST); + format %{ "sve_abs $dst, $src\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_abs(as_FloatRegister($dst$$reg), __ B, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsS(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (AbsVS src)); + ins_cost(SVE_COST); + format %{ "sve_abs $dst, $src\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_abs(as_FloatRegister($dst$$reg), __ H, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsI(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 && + n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (AbsVI src)); + ins_cost(SVE_COST); + format %{ "sve_abs $dst, $src\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_abs(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsL(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 && + n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (AbsVL src)); + ins_cost(SVE_COST); + format %{ "sve_abs $dst, $src\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_abs(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsF(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 && + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (AbsVF src)); + ins_cost(SVE_COST); + format %{ "sve_fabs $dst, $src\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_fabs(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsD(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 && + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (AbsVD src)); + ins_cost(SVE_COST); + format %{ "sve_fabs $dst, $src\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_fabs(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector add + +instruct vaddB(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + match(Set dst (AddVB src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (B)" %} + ins_encode %{ + __ sve_add(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddS(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + match(Set dst (AddVS src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (H)" %} + ins_encode %{ + __ sve_add(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddI(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (AddVI src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_add(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddL(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst (AddVL src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_add(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddF(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (AddVF src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_fadd(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddD(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst (AddVD src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_fadd(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector and + +instruct vand(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + match(Set dst (AndV src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_and $dst, $src1, $src2\t# vector (sve)" %} + ins_encode %{ + __ sve_and(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector or + +instruct vor(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + match(Set dst (OrV src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_orr $dst, $src1, $src2\t# vector (sve)" %} + ins_encode %{ + __ sve_orr(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector xor + +instruct vxor(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + match(Set dst (XorV src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_eor $dst, $src1, $src2\t# vector (sve)" %} + ins_encode %{ + __ sve_eor(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector float div + +instruct vdivF(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst_src1 (DivVF dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivD(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst_src1 (DivVD dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector max + +instruct vmaxF(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 && + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst_src1 (MaxV dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fmax $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_fmax(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmaxD(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 && + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst_src1 (MaxV dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fmax $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_fmax(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vminF(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 && + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst_src1 (MinV dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fmin $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_fmin(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vminD(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 && + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst_src1 (MinV dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fmin $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_fmin(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector fmla + +// dst_src1 = dst_src1 + src2 * src3 +instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 + src2 * src3 +instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector fmls + +// dst_src1 = dst_src1 + -src2 * src3 +// dst_src1 = dst_src1 + src2 * -src3 +instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); + match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); + ins_cost(SVE_COST); + format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 + -src2 * src3 +// dst_src1 = dst_src1 + src2 * -src3 +instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); + match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); + ins_cost(SVE_COST); + format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector fnmla + +// dst_src1 = -dst_src1 + -src2 * src3 +// dst_src1 = -dst_src1 + src2 * -src3 +instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); + match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); + ins_cost(SVE_COST); + format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = -dst_src1 + -src2 * src3 +// dst_src1 = -dst_src1 + src2 * -src3 +instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); + match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); + ins_cost(SVE_COST); + format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector fnmls + +// dst_src1 = -dst_src1 + src2 * src3 +instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = -dst_src1 + src2 * src3 +instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector mla + +// dst_src1 = dst_src1 + src2 * src3 +instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (B)" %} + ins_encode %{ + __ sve_mla(as_FloatRegister($dst_src1$$reg), __ B, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 + src2 * src3 +instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (H)" %} + ins_encode %{ + __ sve_mla(as_FloatRegister($dst_src1$$reg), __ H, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 + src2 * src3 +instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_mla(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 + src2 * src3 +instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_mla(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector mls + +// dst_src1 = dst_src1 - src2 * src3 +instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (B)" %} + ins_encode %{ + __ sve_mls(as_FloatRegister($dst_src1$$reg), __ B, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 - src2 * src3 +instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (H)" %} + ins_encode %{ + __ sve_mls(as_FloatRegister($dst_src1$$reg), __ H, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 - src2 * src3 +instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_mls(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 - src2 * src3 +instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_mls(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + + +// vector mul + +instruct vmulB(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + match(Set dst_src1 (MulVB dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (B)" %} + ins_encode %{ + __ sve_mul(as_FloatRegister($dst_src1$$reg), __ B, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulS(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + match(Set dst_src1 (MulVS dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (H)" %} + ins_encode %{ + __ sve_mul(as_FloatRegister($dst_src1$$reg), __ H, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulI(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst_src1 (MulVI dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_mul(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulL(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst_src1 (MulVL dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_mul(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulF(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (MulVF src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_fmul(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulD(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst (MulVD src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_fmul(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector fneg + +instruct vnegF(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + match(Set dst (NegVF src)); + ins_cost(SVE_COST); + format %{ "sve_fneg $dst, $src\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_fneg(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vnegD(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + match(Set dst (NegVD src)); + ins_cost(SVE_COST); + format %{ "sve_fneg $dst, $src\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_fneg(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// popcount vector + +instruct vpopcountI(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (PopCountVI src)); + format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %} + ins_encode %{ + __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector add reduction + +instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && + (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (S)\n\t" + "umov $dst, $tmp, S, 0\n\t" + "addw $dst, $dst, $src1\t # add reduction S" %} + ins_encode %{ + __ sve_uaddv(as_FloatRegister($tmp$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ S, 0); + __ addw($dst$$Register, $dst$$Register, $src1$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && + (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG)); + match(Set dst (AddReductionVL src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (D)\n\t" + "umov $dst, $tmp, D, 0\n\t" + "add $dst, $dst, $src1\t # add reduction D" %} + ins_encode %{ + __ sve_uaddv(as_FloatRegister($tmp$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0); + __ add($dst$$Register, $dst$$Register, $src1$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addF(vRegF src1_dst, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); + match(Set src1_dst (AddReductionVF src1_dst src2)); + ins_cost(SVE_COST); + format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addD(vRegD src1_dst, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); + match(Set src1_dst (AddReductionVD src1_dst src2)); + ins_cost(SVE_COST); + format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector max reduction + +instruct reduce_maxF(vRegF dst, vRegF src1, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "sve_fmaxv $dst, $src2 # vector (sve) (S)\n\t" + "fmaxs $dst, $dst, $src1\t # max reduction F" %} + ins_encode %{ + __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg)); + __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_maxD(vRegD dst, vRegD src1, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "sve_fmaxv $dst, $src2 # vector (sve) (S)\n\t" + "fmaxs $dst, $dst, $src1\t # max reduction D" %} + ins_encode %{ + __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg)); + __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector min reduction + +instruct reduce_minF(vRegF dst, vRegF src1, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); + match(Set dst (MinReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "sve_fminv $dst, $src2 # vector (sve) (S)\n\t" + "fmins $dst, $dst, $src1\t # min reduction F" %} + ins_encode %{ + __ sve_fminv(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg)); + __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minD(vRegD dst, vRegD src1, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); + match(Set dst (MinReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "sve_fminv $dst, $src2 # vector (sve) (S)\n\t" + "fmins $dst, $dst, $src1\t # min reduction D" %} + ins_encode %{ + __ sve_fminv(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg)); + __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector Math.rint, floor, ceil + +instruct vroundD(vReg dst, vReg src, immI rmode) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 && + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (RoundDoubleModeV src rmode)); + format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %} + ins_encode %{ + switch ($rmode$$constant) { + case RoundDoubleModeNode::rmode_rint: + __ sve_frintn(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + break; + case RoundDoubleModeNode::rmode_floor: + __ sve_frintm(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + break; + case RoundDoubleModeNode::rmode_ceil: + __ sve_frintp(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + break; + } + %} + ins_pipe(pipe_slow); +%} + +// vector replicate + +instruct replicateB(vReg dst, iRegIorL2I src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + match(Set dst (ReplicateB src)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $src\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateS(vReg dst, iRegIorL2I src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + match(Set dst (ReplicateS src)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $src\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateI(vReg dst, iRegIorL2I src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (ReplicateI src)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $src\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateL(vReg dst, iRegL src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst (ReplicateL src)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $src\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + + +instruct replicateB_imm8(vReg dst, immI8 con) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + match(Set dst (ReplicateB con)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $con\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ B, $con$$constant); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateS_imm8(vReg dst, immI8_shift8 con) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + match(Set dst (ReplicateS con)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $con\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ H, $con$$constant); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateI_imm8(vReg dst, immI8_shift8 con) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (ReplicateI con)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $con\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ S, $con$$constant); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateL_imm8(vReg dst, immL8_shift8 con) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst (ReplicateL con)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $con\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ D, $con$$constant); + %} + ins_pipe(pipe_slow); +%} + + +instruct replicateF(vReg dst, vRegF src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (ReplicateF src)); + ins_cost(SVE_COST); + format %{ "sve_cpy $dst, $src\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_cpy(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateD(vReg dst, vRegD src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst (ReplicateD src)); + ins_cost(SVE_COST); + format %{ "sve_cpy $dst, $src\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_cpy(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector shift + +instruct vasrB(vReg dst, vReg shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + match(Set dst (RShiftVB dst shift)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_asr(as_FloatRegister($dst$$reg), __ B, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrS(vReg dst, vReg shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + match(Set dst (RShiftVS dst shift)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_asr(as_FloatRegister($dst$$reg), __ H, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrI(vReg dst, vReg shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (RShiftVI dst shift)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_asr(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrL(vReg dst, vReg shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst (RShiftVL dst shift)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_asr(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslB(vReg dst, vReg shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + match(Set dst (LShiftVB dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_lsl(as_FloatRegister($dst$$reg), __ B, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslS(vReg dst, vReg shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + match(Set dst (LShiftVS dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_lsl(as_FloatRegister($dst$$reg), __ H, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslI(vReg dst, vReg shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (LShiftVI dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_lsl(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslL(vReg dst, vReg shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst (LShiftVL dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_lsl(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrB(vReg dst, vReg shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + match(Set dst (URShiftVB dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_lsr(as_FloatRegister($dst$$reg), __ B, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrS(vReg dst, vReg shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + match(Set dst (URShiftVS dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_lsr(as_FloatRegister($dst$$reg), __ H, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrI(vReg dst, vReg shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (URShiftVI dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_lsr(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrL(vReg dst, vReg shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst (URShiftVL dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_lsr(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + match(Set dst (RShiftVB src shift)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (B)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + if (con >= 8) con = 7; + __ sve_asr(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + match(Set dst (RShiftVS src shift)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (H)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + if (con >= 16) con = 15; + __ sve_asr(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (RShiftVI src shift)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (S)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_asr(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst (RShiftVL src shift)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (D)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_asr(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + match(Set dst (URShiftVB src shift)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (B)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + if (con >= 8) { + __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsr(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + match(Set dst (URShiftVS src shift)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (H)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + if (con >= 8) { + __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsr(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (URShiftVI src shift)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsr(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst (URShiftVL src shift)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsr(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + match(Set dst (LShiftVB src shift)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con >= 8) { + __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsl(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + match(Set dst (LShiftVS src shift)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con >= 8) { + __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsl(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (LShiftVI src shift)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %} + ins_encode %{ + int con = (int)$shift$$constant; + __ sve_lsl(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst (LShiftVL src shift)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %} + ins_encode %{ + int con = (int)$shift$$constant; + __ sve_lsl(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16 && + (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE)); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (B)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8 && + (n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + (n->bottom_type()->is_vect()->element_basic_type() == T_CHAR))); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (H)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (S)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG)); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (D)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector sqrt + +instruct vsqrtF(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + match(Set dst (SqrtVF src)); + ins_cost(SVE_COST); + format %{ "sve_fsqrt $dst, $src\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_fsqrt(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsqrtD(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + match(Set dst (SqrtVD src)); + ins_cost(SVE_COST); + format %{ "sve_fsqrt $dst, $src\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_fsqrt(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector sub + +instruct vsubB(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + match(Set dst (SubVB src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (B)" %} + ins_encode %{ + __ sve_sub(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubS(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + match(Set dst (SubVS src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (H)" %} + ins_encode %{ + __ sve_sub(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubI(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (SubVI src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_sub(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubL(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst (SubVL src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_sub(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubF(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (SubVF src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_fsub(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubD(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + match(Set dst (SubVD src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_fsub(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 Tue Sep 08 15:28:06 2020 +0800 @@ -0,0 +1,767 @@ +// +// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2020, Arm Limited. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +dnl Generate the warning +// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ---- +dnl + +// AArch64 SVE Architecture Description File + +dnl +dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET($1, $2, $3 ) +dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET(imm_type_abbr, imm_type, imm_len) +define(`OPERAND_VMEMORYA_IMMEDIATE_OFFSET', ` +operand vmemA_imm$1Offset$3() +%{ + predicate(Address::offset_ok_for_sve_immed(n->get_$2(), $3, + Matcher::scalable_vector_reg_size(T_BYTE))); + match(Con$1); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%}') +dnl +// 4 bit signed offset -- for predicated load/store +OPERAND_VMEMORYA_IMMEDIATE_OFFSET(I, int, 4) +OPERAND_VMEMORYA_IMMEDIATE_OFFSET(L, long, 4) +dnl +dnl OPERAND_VMEMORYA_INDIRECT_OFFSET($1, $2 ) +dnl OPERAND_VMEMORYA_INDIRECT_OFFSET(imm_type_abbr, imm_len) +define(`OPERAND_VMEMORYA_INDIRECT_OFFSET', ` +operand vmemA_indOff$1$2(iRegP reg, vmemA_imm$1Offset$2 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off, MUL VL]" %} + interface(MEMORY_INTER) %{ + base($reg); + `index'(0xffffffff); + scale(0x0); + disp($off); + %} +%}') +dnl +OPERAND_VMEMORYA_INDIRECT_OFFSET(I, 4) +OPERAND_VMEMORYA_INDIRECT_OFFSET(L, 4) + +opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4); + +source_hpp %{ + bool op_sve_supported(int opcode); +%} + +source %{ + + static inline BasicType vector_element_basic_type(const MachNode* n) { + const TypeVect* vt = n->bottom_type()->is_vect(); + return vt->element_basic_type(); + } + + static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) { + int def_idx = use->operand_index(opnd); + Node* def = use->in(def_idx); + const TypeVect* vt = def->bottom_type()->is_vect(); + return vt->element_basic_type(); + } + + typedef void (C2_MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T, + PRegister Pg, const Address &adr); + + // Predicated load/store, with optional ptrue to all elements of given predicate register. + static void loadStoreA_predicate(C2_MacroAssembler masm, bool is_store, + FloatRegister reg, PRegister pg, BasicType bt, + int opcode, Register base, int index, int size, int disp) { + sve_mem_insn_predicate insn; + Assembler::SIMD_RegVariant type; + int esize = type2aelembytes(bt); + if (index == -1) { + assert(size == 0, "unsupported address mode: scale size = %d", size); + switch(esize) { + case 1: + insn = is_store ? &C2_MacroAssembler::sve_st1b : &C2_MacroAssembler::sve_ld1b; + type = Assembler::B; + break; + case 2: + insn = is_store ? &C2_MacroAssembler::sve_st1h : &C2_MacroAssembler::sve_ld1h; + type = Assembler::H; + break; + case 4: + insn = is_store ? &C2_MacroAssembler::sve_st1w : &C2_MacroAssembler::sve_ld1w; + type = Assembler::S; + break; + case 8: + insn = is_store ? &C2_MacroAssembler::sve_st1d : &C2_MacroAssembler::sve_ld1d; + type = Assembler::D; + break; + default: + assert(false, "unsupported"); + ShouldNotReachHere(); + } + (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE))); + } else { + assert(false, "unimplemented"); + ShouldNotReachHere(); + } + } + + bool op_sve_supported(int opcode) { + switch (opcode) { + case Op_MulAddVS2VI: + // No multiply reduction instructions + case Op_MulReductionVD: + case Op_MulReductionVF: + case Op_MulReductionVI: + case Op_MulReductionVL: + // Others + case Op_Extract: + case Op_ExtractB: + case Op_ExtractC: + case Op_ExtractD: + case Op_ExtractF: + case Op_ExtractI: + case Op_ExtractL: + case Op_ExtractS: + case Op_ExtractUB: + return false; + default: + return true; + } + } + +%} + +definitions %{ + int_def SVE_COST (200, 200); +%} + + +dnl +dnl ELEMENT_SHORT_CHART($1, $2) +dnl ELEMENT_SHORT_CHART(etype, node) +define(`ELEMENT_SHORT_CHAR',`ifelse(`$1', `T_SHORT', + `($2->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + ($2->bottom_type()->is_vect()->element_basic_type() == T_CHAR))', + `($2->bottom_type()->is_vect()->element_basic_type() == $1)')') +dnl + +// All SVE instructions + +// vector load/store + +// Use predicated vector load/store +instruct loadV(vReg dst, vmemA mem) %{ + predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16); + match(Set dst (LoadVector mem)); + ins_cost(SVE_COST); + format %{ "sve_ldr $dst, $mem\t # vector (sve)" %} + ins_encode %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStoreA_predicate(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue, + vector_element_basic_type(this), $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +instruct storeV(vReg src, vmemA mem) %{ + predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16); + match(Set mem (StoreVector mem src)); + ins_cost(SVE_COST); + format %{ "sve_str $mem, $src\t # vector (sve)" %} + ins_encode %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStoreA_predicate(C2_MacroAssembler(&cbuf), true, src_reg, ptrue, + vector_element_basic_type(this, $src), $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +dnl +dnl UNARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, %6 ) +dnl UNARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn) +define(`UNARY_OP_TRUE_PREDICATE_ETYPE', ` +instruct $1(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 && + n->bottom_type()->is_vect()->element_basic_type() == $3); + match(Set dst ($2 src)); + ins_cost(SVE_COST); + format %{ "$6 $dst, $src\t# vector (sve) ($4)" %} + ins_encode %{ + __ $6(as_FloatRegister($dst$$reg), __ $4, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl + +// vector abs +UNARY_OP_TRUE_PREDICATE_ETYPE(vabsB, AbsVB, T_BYTE, B, 16, sve_abs) +UNARY_OP_TRUE_PREDICATE_ETYPE(vabsS, AbsVS, T_SHORT, H, 8, sve_abs) +UNARY_OP_TRUE_PREDICATE_ETYPE(vabsI, AbsVI, T_INT, S, 4, sve_abs) +UNARY_OP_TRUE_PREDICATE_ETYPE(vabsL, AbsVL, T_LONG, D, 2, sve_abs) +UNARY_OP_TRUE_PREDICATE_ETYPE(vabsF, AbsVF, T_FLOAT, S, 4, sve_fabs) +UNARY_OP_TRUE_PREDICATE_ETYPE(vabsD, AbsVD, T_DOUBLE, D, 2, sve_fabs) +dnl +dnl BINARY_OP_UNPREDICATED($1, $2 $3, $4 $5 ) +dnl BINARY_OP_UNPREDICATED(insn_name, op_name, size, min_vec_len, insn) +define(`BINARY_OP_UNPREDICATED', ` +instruct $1(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= $4); + match(Set dst ($2 src1 src2)); + ins_cost(SVE_COST); + format %{ "$5 $dst, $src1, $src2\t # vector (sve) ($3)" %} + ins_encode %{ + __ $5(as_FloatRegister($dst$$reg), __ $3, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl + +// vector add +BINARY_OP_UNPREDICATED(vaddB, AddVB, B, 16, sve_add) +BINARY_OP_UNPREDICATED(vaddS, AddVS, H, 8, sve_add) +BINARY_OP_UNPREDICATED(vaddI, AddVI, S, 4, sve_add) +BINARY_OP_UNPREDICATED(vaddL, AddVL, D, 2, sve_add) +BINARY_OP_UNPREDICATED(vaddF, AddVF, S, 4, sve_fadd) +BINARY_OP_UNPREDICATED(vaddD, AddVD, D, 2, sve_fadd) +dnl +dnl BINARY_OP_UNSIZED($1, $2, $3, $4 ) +dnl BINARY_OP_UNSIZED(insn_name, op_name, min_vec_len, insn) +define(`BINARY_OP_UNSIZED', ` +instruct $1(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $3); + match(Set dst ($2 src1 src2)); + ins_cost(SVE_COST); + format %{ "$4 $dst, $src1, $src2\t# vector (sve)" %} + ins_encode %{ + __ $4(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl + +// vector and +BINARY_OP_UNSIZED(vand, AndV, 16, sve_and) + +// vector or +BINARY_OP_UNSIZED(vor, OrV, 16, sve_orr) + +// vector xor +BINARY_OP_UNSIZED(vxor, XorV, 16, sve_eor) +dnl +dnl VDIVF($1, $2 , $3 ) +dnl VDIVF(name_suffix, size, min_vec_len) +define(`VDIVF', ` +instruct vdiv$1(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= $3); + match(Set dst_src1 (DivV$1 dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) ($2)" %} + ins_encode %{ + __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ $2, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl + +// vector float div +VDIVF(F, S, 4) +VDIVF(D, D, 2) + +dnl +dnl BINARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, $6 ) +dnl BINARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn) +define(`BINARY_OP_TRUE_PREDICATE_ETYPE', ` +instruct $1(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 && + n->bottom_type()->is_vect()->element_basic_type() == $3); + match(Set dst_src1 ($2 dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "$6 $dst_src1, $dst_src1, $src2\t # vector (sve) ($4)" %} + ins_encode %{ + __ $6(as_FloatRegister($dst_src1$$reg), __ $4, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// vector max +BINARY_OP_TRUE_PREDICATE_ETYPE(vmaxF, MaxV, T_FLOAT, S, 4, sve_fmax) +BINARY_OP_TRUE_PREDICATE_ETYPE(vmaxD, MaxV, T_DOUBLE, D, 2, sve_fmax) +BINARY_OP_TRUE_PREDICATE_ETYPE(vminF, MinV, T_FLOAT, S, 4, sve_fmin) +BINARY_OP_TRUE_PREDICATE_ETYPE(vminD, MinV, T_DOUBLE, D, 2, sve_fmin) + +dnl +dnl VFMLA($1 $2 $3 ) +dnl VFMLA(name_suffix, size, min_vec_len) +define(`VFMLA', ` +// dst_src1 = dst_src1 + src2 * src3 +instruct vfmla$1(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3); + match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %} + ins_encode %{ + __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ $2, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// vector fmla +VFMLA(F, S, 4) +VFMLA(D, D, 2) + +dnl +dnl VFMLS($1 $2 $3 ) +dnl VFMLS(name_suffix, size, min_vec_len) +define(`VFMLS', ` +// dst_src1 = dst_src1 + -src2 * src3 +// dst_src1 = dst_src1 + src2 * -src3 +instruct vfmls$1(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3); + match(Set dst_src1 (FmaV$1 dst_src1 (Binary (NegV$1 src2) src3))); + match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 (NegV$1 src3)))); + ins_cost(SVE_COST); + format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %} + ins_encode %{ + __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ $2, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// vector fmls +VFMLS(F, S, 4) +VFMLS(D, D, 2) + +dnl +dnl VFNMLA($1 $2 $3 ) +dnl VFNMLA(name_suffix, size, min_vec_len) +define(`VFNMLA', ` +// dst_src1 = -dst_src1 + -src2 * src3 +// dst_src1 = -dst_src1 + src2 * -src3 +instruct vfnmla$1(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3); + match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary (NegV$1 src2) src3))); + match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 (NegV$1 src3)))); + ins_cost(SVE_COST); + format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %} + ins_encode %{ + __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ $2, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// vector fnmla +VFNMLA(F, S, 4) +VFNMLA(D, D, 2) + +dnl +dnl VFNMLS($1 $2 $3 ) +dnl VFNMLS(name_suffix, size, min_vec_len) +define(`VFNMLS', ` +// dst_src1 = -dst_src1 + src2 * src3 +instruct vfnmls$1(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3); + match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %} + ins_encode %{ + __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ $2, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// vector fnmls +VFNMLS(F, S, 4) +VFNMLS(D, D, 2) + +dnl +dnl VMLA($1 $2 $3 ) +dnl VMLA(name_suffix, size, min_vec_len) +define(`VMLA', ` +// dst_src1 = dst_src1 + src2 * src3 +instruct vmla$1(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= $3); + match(Set dst_src1 (AddV$1 dst_src1 (MulV$1 src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) ($2)" %} + ins_encode %{ + __ sve_mla(as_FloatRegister($dst_src1$$reg), __ $2, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// vector mla +VMLA(B, B, 16) +VMLA(S, H, 8) +VMLA(I, S, 4) +VMLA(L, D, 2) + +dnl +dnl VMLS($1 $2 $3 ) +dnl VMLS(name_suffix, size, min_vec_len) +define(`VMLS', ` +// dst_src1 = dst_src1 - src2 * src3 +instruct vmls$1(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= $3); + match(Set dst_src1 (SubV$1 dst_src1 (MulV$1 src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) ($2)" %} + ins_encode %{ + __ sve_mls(as_FloatRegister($dst_src1$$reg), __ $2, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// vector mls +VMLS(B, B, 16) +VMLS(S, H, 8) +VMLS(I, S, 4) +VMLS(L, D, 2) + +dnl +dnl BINARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 ) +dnl BINARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn) +define(`BINARY_OP_TRUE_PREDICATE', ` +instruct $1(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= $4); + match(Set dst_src1 ($2 dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "$5 $dst_src1, $dst_src1, $src2\t # vector (sve) ($3)" %} + ins_encode %{ + __ $5(as_FloatRegister($dst_src1$$reg), __ $3, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl + +// vector mul +BINARY_OP_TRUE_PREDICATE(vmulB, MulVB, B, 16, sve_mul) +BINARY_OP_TRUE_PREDICATE(vmulS, MulVS, H, 8, sve_mul) +BINARY_OP_TRUE_PREDICATE(vmulI, MulVI, S, 4, sve_mul) +BINARY_OP_TRUE_PREDICATE(vmulL, MulVL, D, 2, sve_mul) +BINARY_OP_UNPREDICATED(vmulF, MulVF, S, 4, sve_fmul) +BINARY_OP_UNPREDICATED(vmulD, MulVD, D, 2, sve_fmul) + +dnl +dnl UNARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 ) +dnl UNARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_bytes, insn) +define(`UNARY_OP_TRUE_PREDICATE', ` +instruct $1(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $4); + match(Set dst ($2 src)); + ins_cost(SVE_COST); + format %{ "$5 $dst, $src\t# vector (sve) ($3)" %} + ins_encode %{ + __ $5(as_FloatRegister($dst$$reg), __ $3, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// vector fneg +UNARY_OP_TRUE_PREDICATE(vnegF, NegVF, S, 16, sve_fneg) +UNARY_OP_TRUE_PREDICATE(vnegD, NegVD, D, 16, sve_fneg) + +// popcount vector + +instruct vpopcountI(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + match(Set dst (PopCountVI src)); + format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %} + ins_encode %{ + __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +dnl +dnl REDUCE_ADD($1, $2, $3, $4, $5, $6, $7 ) +dnl REDUCE_ADD(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1) +define(`REDUCE_ADD', ` +instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && + ELEMENT_SHORT_CHAR($6, n->in(2))); + match(Set dst ($2 src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_uaddv $tmp, $src2\t# vector (sve) ($5)\n\t" + "umov $dst, $tmp, $5, 0\n\t" + "$7 $dst, $dst, $src1\t # add reduction $5" %} + ins_encode %{ + __ sve_uaddv(as_FloatRegister($tmp$$reg), __ $5, + ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ $5, 0); + __ $7($dst$$Register, $dst$$Register, $src1$$Register); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl REDUCE_ADDF($1, $2, $3, $4 ) +dnl REDUCE_ADDF(insn_name, op_name, reg_dst, size) +define(`REDUCE_ADDF', ` +instruct $1($3 src1_dst, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); + match(Set src1_dst ($2 src1_dst src2)); + ins_cost(SVE_COST); + format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) ($4)" %} + ins_encode %{ + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// vector add reduction +REDUCE_ADD(reduce_addI, AddReductionVI, iRegINoSp, iRegIorL2I, S, T_INT, addw) +REDUCE_ADD(reduce_addL, AddReductionVL, iRegLNoSp, iRegL, D, T_LONG, add) +REDUCE_ADDF(reduce_addF, AddReductionVF, vRegF, S) +REDUCE_ADDF(reduce_addD, AddReductionVD, vRegD, D) + +dnl +dnl REDUCE_FMINMAX($1, $2, $3, $4, $5 ) +dnl REDUCE_FMINMAX(min_max, name_suffix, element_type, size, reg_src_dst) +define(`REDUCE_FMINMAX', ` +instruct reduce_$1$2($5 dst, $5 src1, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); + match(Set dst (translit($1, `m', `M')ReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "sve_f$1v $dst, $src2 # vector (sve) (S)\n\t" + "f$1s $dst, $dst, $src1\t # $1 reduction $2" %} + ins_encode %{ + __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, + ptrue, as_FloatRegister($src2$$reg)); + __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +// vector max reduction +REDUCE_FMINMAX(max, F, T_FLOAT, S, vRegF) +REDUCE_FMINMAX(max, D, T_DOUBLE, D, vRegD) + +// vector min reduction +REDUCE_FMINMAX(min, F, T_FLOAT, S, vRegF) +REDUCE_FMINMAX(min, D, T_DOUBLE, D, vRegD) + +// vector Math.rint, floor, ceil + +instruct vroundD(vReg dst, vReg src, immI rmode) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 && + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (RoundDoubleModeV src rmode)); + format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %} + ins_encode %{ + switch ($rmode$$constant) { + case RoundDoubleModeNode::rmode_rint: + __ sve_frintn(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + break; + case RoundDoubleModeNode::rmode_floor: + __ sve_frintm(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + break; + case RoundDoubleModeNode::rmode_ceil: + __ sve_frintp(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + break; + } + %} + ins_pipe(pipe_slow); +%} +dnl +dnl REPLICATE($1, $2, $3, $4, $5 ) +dnl REPLICATE(insn_name, op_name, reg_src, size, min_vec_len) +define(`REPLICATE', ` +instruct $1(vReg dst, $3 src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= $5); + match(Set dst ($2 src)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $src\t# vector (sve) ($4)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ $4, as_Register($src$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl REPLICATE_IMM8($1, $2, $3, $4, $5 ) +dnl REPLICATE_IMM8(insn_name, op_name, imm_type, size, min_vec_len) +define(`REPLICATE_IMM8', ` +instruct $1(vReg dst, $3 con) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= $5); + match(Set dst ($2 con)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $con\t# vector (sve) ($4)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ $4, $con$$constant); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl FREPLICATE($1, $2, $3, $4, $5 ) +dnl FREPLICATE(insn_name, op_name, reg_src, size, min_vec_len) +define(`FREPLICATE', ` +instruct $1(vReg dst, $3 src) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= $5); + match(Set dst ($2 src)); + ins_cost(SVE_COST); + format %{ "sve_cpy $dst, $src\t# vector (sve) ($4)" %} + ins_encode %{ + __ sve_cpy(as_FloatRegister($dst$$reg), __ $4, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl + +// vector replicate +REPLICATE(replicateB, ReplicateB, iRegIorL2I, B, 16) +REPLICATE(replicateS, ReplicateS, iRegIorL2I, H, 8) +REPLICATE(replicateI, ReplicateI, iRegIorL2I, S, 4) +REPLICATE(replicateL, ReplicateL, iRegL, D, 2) + +REPLICATE_IMM8(replicateB_imm8, ReplicateB, immI8, B, 16) +REPLICATE_IMM8(replicateS_imm8, ReplicateS, immI8_shift8, H, 8) +REPLICATE_IMM8(replicateI_imm8, ReplicateI, immI8_shift8, S, 4) +REPLICATE_IMM8(replicateL_imm8, ReplicateL, immL8_shift8, D, 2) + +FREPLICATE(replicateF, ReplicateF, vRegF, S, 4) +FREPLICATE(replicateD, ReplicateD, vRegD, D, 2) +dnl +dnl VSHIFT_TRUE_PREDICATE($1, $2, $3, $4, $5 ) +dnl VSHIFT_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn) +define(`VSHIFT_TRUE_PREDICATE', ` +instruct $1(vReg dst, vReg shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= $4); + match(Set dst ($2 dst shift)); + ins_cost(SVE_COST); + format %{ "$5 $dst, $dst, $shift\t# vector (sve) ($3)" %} + ins_encode %{ + __ $5(as_FloatRegister($dst$$reg), __ $3, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl VSHIFT_IMM_UNPREDICATE($1, $2, $3, $4, $5 ) +dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn) +define(`VSHIFT_IMM_UNPREDICATE', ` +instruct $1(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= $4); + match(Set dst ($2 src shift)); + ins_cost(SVE_COST); + format %{ "$5 $dst, $src, $shift\t# vector (sve) ($3)" %} + ins_encode %{ + int con = (int)$shift$$constant;dnl +ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, ` + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + }')dnl +ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$3', `B') == 0), 1, ` + if (con >= 8) con = 7;')ifelse(eval(index(`$3', `H') == 0), 1, ` + if (con >= 16) con = 15;')')dnl +ifelse(eval((index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0) && (index(`$3', `B') == 0 || index(`$3', `H') == 0)), 1, ` + if (con >= 8) { + __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + }') + __ $5(as_FloatRegister($dst$$reg), __ $3, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl VSHIFT_COUNT($1, $2, $3, $4 ) +dnl VSHIFT_COUNT(insn_name, size, min_vec_len, type) +define(`VSHIFT_COUNT', ` +instruct $1(vReg dst, iRegIorL2I cnt) %{ + predicate(UseSVE > 0 && n->as_Vector()->length() >= $3 && + ELEMENT_SHORT_CHAR($4, n)); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) ($2)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ $2, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl + +// vector shift +VSHIFT_TRUE_PREDICATE(vasrB, RShiftVB, B, 16, sve_asr) +VSHIFT_TRUE_PREDICATE(vasrS, RShiftVS, H, 8, sve_asr) +VSHIFT_TRUE_PREDICATE(vasrI, RShiftVI, S, 4, sve_asr) +VSHIFT_TRUE_PREDICATE(vasrL, RShiftVL, D, 2, sve_asr) +VSHIFT_TRUE_PREDICATE(vlslB, LShiftVB, B, 16, sve_lsl) +VSHIFT_TRUE_PREDICATE(vlslS, LShiftVS, H, 8, sve_lsl) +VSHIFT_TRUE_PREDICATE(vlslI, LShiftVI, S, 4, sve_lsl) +VSHIFT_TRUE_PREDICATE(vlslL, LShiftVL, D, 2, sve_lsl) +VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr) +VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H, 8, sve_lsr) +VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S, 4, sve_lsr) +VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D, 2, sve_lsr) +VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB, B, 16, sve_asr) +VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS, H, 8, sve_asr) +VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI, S, 4, sve_asr) +VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL, D, 2, sve_asr) +VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, B, 16, sve_lsr) +VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, H, 8, sve_lsr) +VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, S, 4, sve_lsr) +VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, D, 2, sve_lsr) +VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB, B, 16, sve_lsl) +VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS, H, 8, sve_lsl) +VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI, S, 4, sve_lsl) +VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL, D, 2, sve_lsl) +VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE) +VSHIFT_COUNT(vshiftcntS, H, 8, T_SHORT) +VSHIFT_COUNT(vshiftcntI, S, 4, T_INT) +VSHIFT_COUNT(vshiftcntL, D, 2, T_LONG) + +// vector sqrt +UNARY_OP_TRUE_PREDICATE(vsqrtF, SqrtVF, S, 16, sve_fsqrt) +UNARY_OP_TRUE_PREDICATE(vsqrtD, SqrtVD, D, 16, sve_fsqrt) + +// vector sub +BINARY_OP_UNPREDICATED(vsubB, SubVB, B, 16, sve_sub) +BINARY_OP_UNPREDICATED(vsubS, SubVS, H, 8, sve_sub) +BINARY_OP_UNPREDICATED(vsubI, SubVI, S, 4, sve_sub) +BINARY_OP_UNPREDICATED(vsubL, SubVL, D, 2, sve_sub) +BINARY_OP_UNPREDICATED(vsubF, SubVF, S, 4, sve_fsub) +BINARY_OP_UNPREDICATED(vsubD, SubVD, D, 2, sve_fsub) +
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -756,7 +756,7 @@ __ fcmge(v18, __ T2D, v19, v20); // fcmge v18.2D, v19.2D, v20.2D // SHA512SIMDOp - __ sha512h(v13, __ T2D, v4, v28); // sha512h q13, q4, v28.2D + __ sha512h(v13, __ T2D, v4, v28); // sha512h q13, q4, v28.2D __ sha512h2(v23, __ T2D, v21, v25); // sha512h2 q23, q21, v25.2D __ sha512su0(v24, __ T2D, v3); // sha512su0 v24.2D, v3.2D __ sha512su1(v23, __ T2D, v26, v23); // sha512su1 v23.2D, v26.2D, v23.2D @@ -777,6 +777,39 @@ __ mov(v1, __ T4H, 2, zr); // mov v1.h[2], wzr __ mov(v1, __ T8B, 3, zr); // mov v1.b[3], wzr __ ld1(v31, v0, __ T2D, Address(__ post(r1, r0))); // ld1 {v31.2d, v0.2d}, [x1], x0 + __ sve_cpy(z0, __ S, p0, v1); // mov z0.s, p0/m, s1 + __ sve_inc(r0, __ S); // incw x0 + __ sve_dec(r1, __ H); // dech x1 + __ sve_lsl(z0, __ B, z1, 7); // lsl z0.b, z1.b, #7 + __ sve_lsl(z21, __ H, z1, 15); // lsl z21.h, z1.h, #15 + __ sve_lsl(z0, __ S, z1, 31); // lsl z0.s, z1.s, #31 + __ sve_lsl(z0, __ D, z1, 63); // lsl z0.d, z1.d, #63 + __ sve_lsr(z0, __ B, z1, 7); // lsr z0.b, z1.b, #7 + __ sve_asr(z0, __ H, z11, 15); // asr z0.h, z11.h, #15 + __ sve_lsr(z30, __ S, z1, 31); // lsr z30.s, z1.s, #31 + __ sve_asr(z0, __ D, z1, 63); // asr z0.d, z1.d, #63 + __ sve_addvl(sp, r0, 31); // addvl sp, x0, #31 + __ sve_addpl(r1, sp, -32); // addpl x1, sp, -32 + __ sve_cntp(r8, __ B, p0, p1); // cntp x8, p0, p1.b + __ sve_dup(z0, __ B, 127); // dup z0.b, 127 + __ sve_dup(z1, __ H, -128); // dup z1.h, -128 + __ sve_dup(z2, __ S, 32512); // dup z2.s, 32512 + __ sve_dup(z7, __ D, -32768); // dup z7.d, -32768 + __ sve_ld1b(z0, __ B, p0, Address(sp)); // ld1b {z0.b}, p0/z, [sp] + __ sve_ld1h(z10, __ H, p1, Address(sp, -8)); // ld1h {z10.h}, p1/z, [sp, #-8, MUL VL] + __ sve_ld1w(z20, __ S, p2, Address(r0, 7)); // ld1w {z20.s}, p2/z, [x0, #7, MUL VL] + __ sve_ld1b(z30, __ B, p3, Address(sp, r8)); // ld1b {z30.b}, p3/z, [sp, x8] + __ sve_ld1w(z0, __ S, p4, Address(sp, r28)); // ld1w {z0.s}, p4/z, [sp, x28, LSL #2] + __ sve_ld1d(z11, __ D, p5, Address(r0, r1)); // ld1d {z11.d}, p5/z, [x0, x1, LSL #3] + __ sve_st1b(z22, __ B, p6, Address(sp)); // st1b {z22.b}, p6, [sp] + __ sve_st1b(z31, __ B, p7, Address(sp, -8)); // st1b {z31.b}, p7, [sp, #-8, MUL VL] + __ sve_st1w(z0, __ S, p1, Address(r0, 7)); // st1w {z0.s}, p1, [x0, #7, MUL VL] + __ sve_st1b(z0, __ B, p2, Address(sp, r1)); // st1b {z0.b}, p2, [sp, x1] + __ sve_st1h(z0, __ H, p3, Address(sp, r8)); // st1h {z0.h}, p3, [sp, x8, LSL #1] + __ sve_st1d(z0, __ D, p4, Address(r0, r18)); // st1d {z0.d}, p4, [x0, x18, LSL #3] + __ sve_ldr(z0, Address(sp)); // ldr z0, [sp] + __ sve_ldr(z31, Address(sp, -256)); // ldr z31, [sp, #-256, MUL VL] + __ sve_str(z8, Address(r8, 255)); // str z8, [x8, #255, MUL VL] // FloatImmediateOp __ fmovd(v0, 2.0); // fmov d0, #2.0 @@ -829,17 +862,17 @@ __ ldbica(Assembler::xword, r28, r30, sp); // ldclra x28, x30, [sp] __ ldeora(Assembler::xword, r1, r26, r28); // ldeora x1, x26, [x28] __ ldorra(Assembler::xword, r4, r30, r4); // ldseta x4, x30, [x4] - __ ldsmina(Assembler::xword, r6, r30, r26); // ldsmina x6, x30, [x26] - __ ldsmaxa(Assembler::xword, r18, r9, r8); // ldsmaxa x18, x9, [x8] - __ ldumina(Assembler::xword, r12, r0, r20); // ldumina x12, x0, [x20] - __ ldumaxa(Assembler::xword, r1, r24, r2); // ldumaxa x1, x24, [x2] + __ ldsmina(Assembler::xword, r6, r30, r26); // ldsmina x6, x30, [x26] + __ ldsmaxa(Assembler::xword, r18, r9, r8); // ldsmaxa x18, x9, [x8] + __ ldumina(Assembler::xword, r12, r0, r20); // ldumina x12, x0, [x20] + __ ldumaxa(Assembler::xword, r1, r24, r2); // ldumaxa x1, x24, [x2] // LSEOp __ swpal(Assembler::xword, r0, r9, r24); // swpal x0, x9, [x24] - __ ldaddal(Assembler::xword, r26, r16, r30); // ldaddal x26, x16, [x30] - __ ldbical(Assembler::xword, r3, r10, r23); // ldclral x3, x10, [x23] - __ ldeoral(Assembler::xword, r10, r4, r18); // ldeoral x10, x4, [x18] - __ ldorral(Assembler::xword, r2, r11, r8); // ldsetal x2, x11, [x8] + __ ldaddal(Assembler::xword, r26, r16, r30); // ldaddal x26, x16, [x30] + __ ldbical(Assembler::xword, r3, r10, r23); // ldclral x3, x10, [x23] + __ ldeoral(Assembler::xword, r10, r4, r18); // ldeoral x10, x4, [x18] + __ ldorral(Assembler::xword, r2, r11, r8); // ldsetal x2, x11, [x8] __ ldsminal(Assembler::xword, r10, r15, r17); // ldsminal x10, x15, [x17] __ ldsmaxal(Assembler::xword, r2, r10, r12); // ldsmaxal x2, x10, [x12] __ lduminal(Assembler::xword, r12, r15, r13); // lduminal x12, x15, [x13] @@ -851,10 +884,10 @@ __ ldbicl(Assembler::xword, r18, r21, r16); // ldclrl x18, x21, [x16] __ ldeorl(Assembler::xword, r18, r11, r21); // ldeorl x18, x11, [x21] __ ldorrl(Assembler::xword, r23, r12, r26); // ldsetl x23, x12, [x26] - __ ldsminl(Assembler::xword, r23, r28, r14); // ldsminl x23, x28, [x14] - __ ldsmaxl(Assembler::xword, r11, r24, r1); // ldsmaxl x11, x24, [x1] - __ lduminl(Assembler::xword, r12, zr, r10); // lduminl x12, xzr, [x10] - __ ldumaxl(Assembler::xword, r16, r7, r2); // ldumaxl x16, x7, [x2] + __ ldsminl(Assembler::xword, r23, r28, r14); // ldsminl x23, x28, [x14] + __ ldsmaxl(Assembler::xword, r11, r24, r1); // ldsmaxl x11, x24, [x1] + __ lduminl(Assembler::xword, r12, zr, r10); // lduminl x12, xzr, [x10] + __ ldumaxl(Assembler::xword, r16, r7, r2); // ldumaxl x16, x7, [x2] // LSEOp __ swp(Assembler::word, r3, r13, r19); // swp w3, w13, [x19] @@ -873,17 +906,17 @@ __ ldbica(Assembler::word, r29, r22, sp); // ldclra w29, w22, [sp] __ ldeora(Assembler::word, r19, zr, r5); // ldeora w19, wzr, [x5] __ ldorra(Assembler::word, r14, r18, sp); // ldseta w14, w18, [sp] - __ ldsmina(Assembler::word, r18, r27, r20); // ldsmina w18, w27, [x20] - __ ldsmaxa(Assembler::word, r16, r12, r11); // ldsmaxa w16, w12, [x11] - __ ldumina(Assembler::word, r9, r6, r30); // ldumina w9, w6, [x30] - __ ldumaxa(Assembler::word, r17, r27, r28); // ldumaxa w17, w27, [x28] + __ ldsmina(Assembler::word, r18, r27, r20); // ldsmina w18, w27, [x20] + __ ldsmaxa(Assembler::word, r16, r12, r11); // ldsmaxa w16, w12, [x11] + __ ldumina(Assembler::word, r9, r6, r30); // ldumina w9, w6, [x30] + __ ldumaxa(Assembler::word, r17, r27, r28); // ldumaxa w17, w27, [x28] // LSEOp __ swpal(Assembler::word, r30, r7, r10); // swpal w30, w7, [x10] - __ ldaddal(Assembler::word, r20, r10, r4); // ldaddal w20, w10, [x4] - __ ldbical(Assembler::word, r24, r17, r17); // ldclral w24, w17, [x17] - __ ldeoral(Assembler::word, r22, r3, r29); // ldeoral w22, w3, [x29] - __ ldorral(Assembler::word, r15, r22, r19); // ldsetal w15, w22, [x19] + __ ldaddal(Assembler::word, r20, r10, r4); // ldaddal w20, w10, [x4] + __ ldbical(Assembler::word, r24, r17, r17); // ldclral w24, w17, [x17] + __ ldeoral(Assembler::word, r22, r3, r29); // ldeoral w22, w3, [x29] + __ ldorral(Assembler::word, r15, r22, r19); // ldsetal w15, w22, [x19] __ ldsminal(Assembler::word, r19, r22, r2); // ldsminal w19, w22, [x2] __ ldsmaxal(Assembler::word, r15, r6, r12); // ldsmaxal w15, w6, [x12] __ lduminal(Assembler::word, r16, r11, r13); // lduminal w16, w11, [x13] @@ -895,697 +928,65 @@ __ ldbicl(Assembler::word, r13, r10, r21); // ldclrl w13, w10, [x21] __ ldeorl(Assembler::word, r29, r27, r12); // ldeorl w29, w27, [x12] __ ldorrl(Assembler::word, r27, r3, r1); // ldsetl w27, w3, [x1] - __ ldsminl(Assembler::word, zr, r24, r19); // ldsminl wzr, w24, [x19] - __ ldsmaxl(Assembler::word, r17, r9, r28); // ldsmaxl w17, w9, [x28] - __ lduminl(Assembler::word, r27, r15, r7); // lduminl w27, w15, [x7] - __ ldumaxl(Assembler::word, r21, r23, sp); // ldumaxl w21, w23, [sp] + __ ldsminl(Assembler::word, zr, r24, r19); // ldsminl wzr, w24, [x19] + __ ldsmaxl(Assembler::word, r17, r9, r28); // ldsmaxl w17, w9, [x28] + __ lduminl(Assembler::word, r27, r15, r7); // lduminl w27, w15, [x7] + __ ldumaxl(Assembler::word, r21, r23, sp); // ldumaxl w21, w23, [sp] + +// SVEVectorOp + __ sve_add(z24, __ D, z2, z30); // add z24.d, z2.d, z30.d + __ sve_sub(z18, __ S, z10, z22); // sub z18.s, z10.s, z22.s + __ sve_fadd(z2, __ D, z17, z0); // fadd z2.d, z17.d, z0.d + __ sve_fmul(z25, __ D, z22, z2); // fmul z25.d, z22.d, z2.d + __ sve_fsub(z12, __ D, z3, z27); // fsub z12.d, z3.d, z27.d + __ sve_abs(z28, __ B, p4, z26); // abs z28.b, p4/m, z26.b + __ sve_add(z9, __ B, p7, z18); // add z9.b, p7/m, z9.b, z18.b + __ sve_asr(z4, __ H, p1, z15); // asr z4.h, p1/m, z4.h, z15.h + __ sve_cnt(z22, __ D, p2, z2); // cnt z22.d, p2/m, z2.d + __ sve_lsl(z20, __ D, p7, z5); // lsl z20.d, p7/m, z20.d, z5.d + __ sve_lsr(z0, __ B, p4, z14); // lsr z0.b, p4/m, z0.b, z14.b + __ sve_mul(z25, __ S, p2, z27); // mul z25.s, p2/m, z25.s, z27.s + __ sve_neg(z26, __ S, p6, z24); // neg z26.s, p6/m, z24.s + __ sve_not(z0, __ S, p1, z6); // not z0.s, p1/m, z6.s + __ sve_smax(z0, __ B, p1, z15); // smax z0.b, p1/m, z0.b, z15.b + __ sve_smin(z9, __ H, p1, z5); // smin z9.h, p1/m, z9.h, z5.h + __ sve_sub(z27, __ S, p1, z20); // sub z27.s, p1/m, z27.s, z20.s + __ sve_fabs(z20, __ S, p1, z10); // fabs z20.s, p1/m, z10.s + __ sve_fadd(z16, __ D, p7, z6); // fadd z16.d, p7/m, z16.d, z6.d + __ sve_fdiv(z2, __ D, p3, z29); // fdiv z2.d, p3/m, z2.d, z29.d + __ sve_fmax(z2, __ D, p6, z22); // fmax z2.d, p6/m, z2.d, z22.d + __ sve_fmin(z14, __ D, p3, z27); // fmin z14.d, p3/m, z14.d, z27.d + __ sve_fmul(z23, __ S, p1, z2); // fmul z23.s, p1/m, z23.s, z2.s + __ sve_fneg(z10, __ D, p4, z10); // fneg z10.d, p4/m, z10.d + __ sve_frintm(z22, __ D, p3, z3); // frintm z22.d, p3/m, z3.d + __ sve_frintn(z16, __ D, p1, z1); // frintn z16.d, p1/m, z1.d + __ sve_frintp(z16, __ S, p4, z12); // frintp z16.s, p4/m, z12.s + __ sve_fsqrt(z12, __ S, p0, z16); // fsqrt z12.s, p0/m, z16.s + __ sve_fsub(z20, __ S, p5, z5); // fsub z20.s, p5/m, z20.s, z5.s + __ sve_fmla(z7, __ D, p4, z12, z27); // fmla z7.d, p4/m, z12.d, z27.d + __ sve_fmls(z16, __ S, p1, z2, z28); // fmls z16.s, p1/m, z2.s, z28.s + __ sve_fnmla(z4, __ S, p1, z17, z19); // fnmla z4.s, p1/m, z17.s, z19.s + __ sve_fnmls(z12, __ D, p5, z8, z24); // fnmls z12.d, p5/m, z8.d, z24.d + __ sve_mla(z18, __ B, p0, z10, z23); // mla z18.b, p0/m, z10.b, z23.b + __ sve_mls(z19, __ B, p7, z13, z16); // mls z19.b, p7/m, z13.b, z16.b + __ sve_and(z0, z7, z14); // and z0.d, z7.d, z14.d + __ sve_eor(z25, z8, z10); // eor z25.d, z8.d, z10.d + __ sve_orr(z20, z22, z27); // orr z20.d, z22.d, z27.d + +// SVEReductionOp + __ sve_andv(v3, __ S, p3, z18); // andv s3, p3, z18.s + __ sve_orv(v7, __ B, p1, z28); // orv b7, p1, z28.b + __ sve_eorv(v0, __ S, p2, z16); // eorv s0, p2, z16.s + __ sve_smaxv(v22, __ H, p1, z15); // smaxv h22, p1, z15.h + __ sve_sminv(v22, __ B, p2, z25); // sminv b22, p2, z25.b + __ sve_fminv(v30, __ D, p4, z13); // fminv d30, p4, z13.d + __ sve_fmaxv(v11, __ S, p0, z13); // fmaxv s11, p0, z13.s + __ sve_fadda(v20, __ S, p4, z25); // fadda s20, p4, s20, z25.s + __ sve_uaddv(v4, __ H, p1, z17); // uaddv d4, p1, z17.h __ bind(forth); /* - -aarch64ops.o: file format elf64-littleaarch64 - - -Disassembly of section .text: - -0000000000000000 <back>: - 0: 8b0d82fa add x26, x23, x13, lsl #32 - 4: cb49970c sub x12, x24, x9, lsr #37 - 8: ab889dfc adds x28, x15, x8, asr #39 - c: eb9ee787 subs x7, x28, x30, asr #57 - 10: 0b9b3ec9 add w9, w22, w27, asr #15 - 14: 4b9279a3 sub w3, w13, w18, asr #30 - 18: 2b88474e adds w14, w26, w8, asr #17 - 1c: 6b8c56c0 subs w0, w22, w12, asr #21 - 20: 8a1a51e0 and x0, x15, x26, lsl #20 - 24: aa11f4ba orr x26, x5, x17, lsl #61 - 28: ca0281b8 eor x24, x13, x2, lsl #32 - 2c: ea918c7c ands x28, x3, x17, asr #35 - 30: 0a5d4a19 and w25, w16, w29, lsr #18 - 34: 2a4b264d orr w13, w18, w11, lsr #9 - 38: 4a523ca5 eor w5, w5, w18, lsr #15 - 3c: 6a9b6ae2 ands w2, w23, w27, asr #26 - 40: 8a70b79b bic x27, x28, x16, lsr #45 - 44: aaba9728 orn x8, x25, x26, asr #37 - 48: ca6dfe3d eon x29, x17, x13, lsr #63 - 4c: ea627f1c bics x28, x24, x2, lsr #31 - 50: 0aa70f53 bic w19, w26, w7, asr #3 - 54: 2aaa0f06 orn w6, w24, w10, asr #3 - 58: 4a6176a4 eon w4, w21, w1, lsr #29 - 5c: 6a604eb0 bics w16, w21, w0, lsr #19 - 60: 1105ed91 add w17, w12, #0x17b - 64: 3100583e adds w30, w1, #0x16 - 68: 5101f8bd sub w29, w5, #0x7e - 6c: 710f0306 subs w6, w24, #0x3c0 - 70: 9101a1a0 add x0, x13, #0x68 - 74: b10a5cc8 adds x8, x6, #0x297 - 78: d10810aa sub x10, x5, #0x204 - 7c: f10fd061 subs x1, x3, #0x3f4 - 80: 120cb166 and w6, w11, #0xfff1fff1 - 84: 321764bc orr w28, w5, #0xfffffe07 - 88: 52174681 eor w1, w20, #0x7fffe00 - 8c: 720c0247 ands w7, w18, #0x100000 - 90: 9241018e and x14, x12, #0x8000000000000000 - 94: b25a2969 orr x9, x11, #0x1ffc000000000 - 98: d278b411 eor x17, x0, #0x3fffffffffff00 - 9c: f26aad01 ands x1, x8, #0xffffffffffc00003 - a0: 14000000 b a0 <back+0xa0> - a4: 17ffffd7 b 0 <back> - a8: 14000279 b a8c <forth> - ac: 94000000 bl ac <back+0xac> - b0: 97ffffd4 bl 0 <back> - b4: 94000276 bl a8c <forth> - b8: 3400000a cbz w10, b8 <back+0xb8> - bc: 34fffa2a cbz w10, 0 <back> - c0: 34004e6a cbz w10, a8c <forth> - c4: 35000008 cbnz w8, c4 <back+0xc4> - c8: 35fff9c8 cbnz w8, 0 <back> - cc: 35004e08 cbnz w8, a8c <forth> - d0: b400000b cbz x11, d0 <back+0xd0> - d4: b4fff96b cbz x11, 0 <back> - d8: b4004dab cbz x11, a8c <forth> - dc: b500001d cbnz x29, dc <back+0xdc> - e0: b5fff91d cbnz x29, 0 <back> - e4: b5004d5d cbnz x29, a8c <forth> - e8: 10000013 adr x19, e8 <back+0xe8> - ec: 10fff8b3 adr x19, 0 <back> - f0: 10004cf3 adr x19, a8c <forth> - f4: 90000013 adrp x19, 0 <back> - f8: 36300016 tbz w22, #6, f8 <back+0xf8> - fc: 3637f836 tbz w22, #6, 0 <back> - 100: 36304c76 tbz w22, #6, a8c <forth> - 104: 3758000c tbnz w12, #11, 104 <back+0x104> - 108: 375ff7cc tbnz w12, #11, 0 <back> - 10c: 37584c0c tbnz w12, #11, a8c <forth> - 110: 128313a0 mov w0, #0xffffe762 // #-6302 - 114: 528a32c7 mov w7, #0x5196 // #20886 - 118: 7289173b movk w27, #0x48b9 - 11c: 92ab3acc mov x12, #0xffffffffa629ffff // #-1507196929 - 120: d2a0bf94 mov x20, #0x5fc0000 // #100401152 - 124: f2c285e8 movk x8, #0x142f, lsl #32 - 128: 9358722f sbfx x15, x17, #24, #5 - 12c: 330e652f bfxil w15, w9, #14, #12 - 130: 53067f3b lsr w27, w25, #6 - 134: 93577c53 sbfx x19, x2, #23, #9 - 138: b34a1aac bfi x12, x21, #54, #7 - 13c: d35a4016 ubfiz x22, x0, #38, #17 - 140: 13946c63 extr w3, w3, w20, #27 - 144: 93c3dbc8 extr x8, x30, x3, #54 - 148: 54000000 b.eq 148 <back+0x148> // b.none - 14c: 54fff5a0 b.eq 0 <back> // b.none - 150: 540049e0 b.eq a8c <forth> // b.none - 154: 54000001 b.ne 154 <back+0x154> // b.any - 158: 54fff541 b.ne 0 <back> // b.any - 15c: 54004981 b.ne a8c <forth> // b.any - 160: 54000002 b.cs 160 <back+0x160> // b.hs, b.nlast - 164: 54fff4e2 b.cs 0 <back> // b.hs, b.nlast - 168: 54004922 b.cs a8c <forth> // b.hs, b.nlast - 16c: 54000002 b.cs 16c <back+0x16c> // b.hs, b.nlast - 170: 54fff482 b.cs 0 <back> // b.hs, b.nlast - 174: 540048c2 b.cs a8c <forth> // b.hs, b.nlast - 178: 54000003 b.cc 178 <back+0x178> // b.lo, b.ul, b.last - 17c: 54fff423 b.cc 0 <back> // b.lo, b.ul, b.last - 180: 54004863 b.cc a8c <forth> // b.lo, b.ul, b.last - 184: 54000003 b.cc 184 <back+0x184> // b.lo, b.ul, b.last - 188: 54fff3c3 b.cc 0 <back> // b.lo, b.ul, b.last - 18c: 54004803 b.cc a8c <forth> // b.lo, b.ul, b.last - 190: 54000004 b.mi 190 <back+0x190> // b.first - 194: 54fff364 b.mi 0 <back> // b.first - 198: 540047a4 b.mi a8c <forth> // b.first - 19c: 54000005 b.pl 19c <back+0x19c> // b.nfrst - 1a0: 54fff305 b.pl 0 <back> // b.nfrst - 1a4: 54004745 b.pl a8c <forth> // b.nfrst - 1a8: 54000006 b.vs 1a8 <back+0x1a8> - 1ac: 54fff2a6 b.vs 0 <back> - 1b0: 540046e6 b.vs a8c <forth> - 1b4: 54000007 b.vc 1b4 <back+0x1b4> - 1b8: 54fff247 b.vc 0 <back> - 1bc: 54004687 b.vc a8c <forth> - 1c0: 54000008 b.hi 1c0 <back+0x1c0> // b.pmore - 1c4: 54fff1e8 b.hi 0 <back> // b.pmore - 1c8: 54004628 b.hi a8c <forth> // b.pmore - 1cc: 54000009 b.ls 1cc <back+0x1cc> // b.plast - 1d0: 54fff189 b.ls 0 <back> // b.plast - 1d4: 540045c9 b.ls a8c <forth> // b.plast - 1d8: 5400000a b.ge 1d8 <back+0x1d8> // b.tcont - 1dc: 54fff12a b.ge 0 <back> // b.tcont - 1e0: 5400456a b.ge a8c <forth> // b.tcont - 1e4: 5400000b b.lt 1e4 <back+0x1e4> // b.tstop - 1e8: 54fff0cb b.lt 0 <back> // b.tstop - 1ec: 5400450b b.lt a8c <forth> // b.tstop - 1f0: 5400000c b.gt 1f0 <back+0x1f0> - 1f4: 54fff06c b.gt 0 <back> - 1f8: 540044ac b.gt a8c <forth> - 1fc: 5400000d b.le 1fc <back+0x1fc> - 200: 54fff00d b.le 0 <back> - 204: 5400444d b.le a8c <forth> - 208: 5400000e b.al 208 <back+0x208> - 20c: 54ffefae b.al 0 <back> - 210: 540043ee b.al a8c <forth> - 214: 5400000f b.nv 214 <back+0x214> - 218: 54ffef4f b.nv 0 <back> - 21c: 5400438f b.nv a8c <forth> - 220: d40658e1 svc #0x32c7 - 224: d4014d22 hvc #0xa69 - 228: d4046543 smc #0x232a - 22c: d4273f60 brk #0x39fb - 230: d44cad80 hlt #0x656c - 234: d503201f nop - 238: d69f03e0 eret - 23c: d6bf03e0 drps - 240: d5033fdf isb - 244: d5033e9f dsb st - 248: d50332bf dmb oshst - 24c: d61f0200 br x16 - 250: d63f0280 blr x20 - 254: c80a7d1b stxr w10, x27, [x8] - 258: c800fea1 stlxr w0, x1, [x21] - 25c: c85f7fb1 ldxr x17, [x29] - 260: c85fff9d ldaxr x29, [x28] - 264: c89ffee1 stlr x1, [x23] - 268: c8dffe95 ldar x21, [x20] - 26c: 88167e7b stxr w22, w27, [x19] - 270: 880bfcd0 stlxr w11, w16, [x6] - 274: 885f7c12 ldxr w18, [x0] - 278: 885ffd44 ldaxr w4, [x10] - 27c: 889ffed8 stlr w24, [x22] - 280: 88dffe6a ldar w10, [x19] - 284: 48017fc5 stxrh w1, w5, [x30] - 288: 4808fe2c stlxrh w8, w12, [x17] - 28c: 485f7dc9 ldxrh w9, [x14] - 290: 485ffc27 ldaxrh w7, [x1] - 294: 489ffe05 stlrh w5, [x16] - 298: 48dffd82 ldarh w2, [x12] - 29c: 080a7c6c stxrb w10, w12, [x3] - 2a0: 081cff4e stlxrb w28, w14, [x26] - 2a4: 085f7d5e ldxrb w30, [x10] - 2a8: 085ffeae ldaxrb w14, [x21] - 2ac: 089ffd2d stlrb w13, [x9] - 2b0: 08dfff76 ldarb w22, [x27] - 2b4: c87f4d7c ldxp x28, x19, [x11] - 2b8: c87fcc5e ldaxp x30, x19, [x2] - 2bc: c8220417 stxp w2, x23, x1, [x0] - 2c0: c82cb5f0 stlxp w12, x16, x13, [x15] - 2c4: 887f55b2 ldxp w18, w21, [x13] - 2c8: 887ff90b ldaxp w11, w30, [x8] - 2cc: 88382c2d stxp w24, w13, w11, [x1] - 2d0: 883aedb5 stlxp w26, w21, w27, [x13] - 2d4: f819928b stur x11, [x20, #-103] - 2d8: b803e21c stur w28, [x16, #62] - 2dc: 381f713b sturb w27, [x9, #-9] - 2e0: 781ce322 sturh w2, [x25, #-50] - 2e4: f850f044 ldur x4, [x2, #-241] - 2e8: b85e129e ldur w30, [x20, #-31] - 2ec: 385e92f2 ldurb w18, [x23, #-23] - 2f0: 785ff35d ldurh w29, [x26, #-1] - 2f4: 39801921 ldrsb x1, [x9, #6] - 2f8: 7881318b ldursh x11, [x12, #19] - 2fc: 78dce02b ldursh w11, [x1, #-50] - 300: b8829313 ldursw x19, [x24, #41] - 304: fc45f318 ldur d24, [x24, #95] - 308: bc5d50af ldur s15, [x5, #-43] - 30c: fc001375 stur d21, [x27, #1] - 310: bc1951b7 stur s23, [x13, #-107] - 314: f8008c0b str x11, [x0, #8]! - 318: b801dc03 str w3, [x0, #29]! - 31c: 38009dcb strb w11, [x14, #9]! - 320: 781fdf1d strh w29, [x24, #-3]! - 324: f8570e2d ldr x13, [x17, #-144]! - 328: b85faecc ldr w12, [x22, #-6]! - 32c: 385f6d8d ldrb w13, [x12, #-10]! - 330: 785ebea0 ldrh w0, [x21, #-21]! - 334: 38804cf7 ldrsb x23, [x7, #4]! - 338: 789cbce3 ldrsh x3, [x7, #-53]! - 33c: 78df9cbc ldrsh w28, [x5, #-7]! - 340: b89eed38 ldrsw x24, [x9, #-18]! - 344: fc40cd6e ldr d14, [x11, #12]! - 348: bc5bdd93 ldr s19, [x12, #-67]! - 34c: fc103c14 str d20, [x0, #-253]! - 350: bc040c08 str s8, [x0, #64]! - 354: f81a2784 str x4, [x28], #-94 - 358: b81ca4ec str w12, [x7], #-54 - 35c: 381e855b strb w27, [x10], #-24 - 360: 7801b506 strh w6, [x8], #27 - 364: f853654e ldr x14, [x10], #-202 - 368: b85d74b0 ldr w16, [x5], #-41 - 36c: 384095c2 ldrb w2, [x14], #9 - 370: 785ec5bc ldrh w28, [x13], #-20 - 374: 389e15a9 ldrsb x9, [x13], #-31 - 378: 789dc703 ldrsh x3, [x24], #-36 - 37c: 78c06474 ldrsh w20, [x3], #6 - 380: b89ff667 ldrsw x7, [x19], #-1 - 384: fc57e51e ldr d30, [x8], #-130 - 388: bc4155f9 ldr s25, [x15], #21 - 38c: fc05a6ee str d14, [x23], #90 - 390: bc1df408 str s8, [x0], #-33 - 394: f835da4a str x10, [x18, w21, sxtw #3] - 398: b836d9a4 str w4, [x13, w22, sxtw #2] - 39c: 3833580d strb w13, [x0, w19, uxtw #0] - 3a0: 7826cb6c strh w12, [x27, w6, sxtw] - 3a4: f8706900 ldr x0, [x8, x16] - 3a8: b87ae880 ldr w0, [x4, x26, sxtx] - 3ac: 3865db2e ldrb w14, [x25, w5, sxtw #0] - 3b0: 78724889 ldrh w9, [x4, w18, uxtw] - 3b4: 38a7789b ldrsb x27, [x4, x7, lsl #0] - 3b8: 78beca2f ldrsh x15, [x17, w30, sxtw] - 3bc: 78f6c810 ldrsh w16, [x0, w22, sxtw] - 3c0: b8bef956 ldrsw x22, [x10, x30, sxtx #2] - 3c4: fc6afabd ldr d29, [x21, x10, sxtx #3] - 3c8: bc734963 ldr s3, [x11, w19, uxtw] - 3cc: fc3d5b8d str d13, [x28, w29, uxtw #3] - 3d0: bc25fbb7 str s23, [x29, x5, sxtx #2] - 3d4: f9189d05 str x5, [x8, #12600] - 3d8: b91ecb1d str w29, [x24, #7880] - 3dc: 39187a33 strb w19, [x17, #1566] - 3e0: 791f226d strh w13, [x19, #3984] - 3e4: f95aa2f3 ldr x19, [x23, #13632] - 3e8: b9587bb7 ldr w23, [x29, #6264] - 3ec: 395f7176 ldrb w22, [x11, #2012] - 3f0: 795d9143 ldrh w3, [x10, #3784] - 3f4: 399e7e08 ldrsb x8, [x16, #1951] - 3f8: 799a2697 ldrsh x23, [x20, #3346] - 3fc: 79df3422 ldrsh w2, [x1, #3994] - 400: b99c2624 ldrsw x4, [x17, #7204] - 404: fd5c2374 ldr d20, [x27, #14400] - 408: bd5fa1d9 ldr s25, [x14, #8096] - 40c: fd1d595a str d26, [x10, #15024] - 410: bd1b1869 str s9, [x3, #6936] - 414: 580033db ldr x27, a8c <forth> - 418: 1800000b ldr w11, 418 <back+0x418> - 41c: f8945060 prfum pldl1keep, [x3, #-187] - 420: d8000000 prfm pldl1keep, 420 <back+0x420> - 424: f8ae6ba0 prfm pldl1keep, [x29, x14] - 428: f99a0080 prfm pldl1keep, [x4, #13312] - 42c: 1a070035 adc w21, w1, w7 - 430: 3a0700a8 adcs w8, w5, w7 - 434: 5a0e0367 sbc w7, w27, w14 - 438: 7a11009b sbcs w27, w4, w17 - 43c: 9a000380 adc x0, x28, x0 - 440: ba1e030c adcs x12, x24, x30 - 444: da0f0320 sbc x0, x25, x15 - 448: fa030301 sbcs x1, x24, x3 - 44c: 0b340b12 add w18, w24, w20, uxtb #2 - 450: 2b2a278d adds w13, w28, w10, uxth #1 - 454: cb22aa0f sub x15, x16, w2, sxth #2 - 458: 6b2d29bd subs w29, w13, w13, uxth #2 - 45c: 8b2cce8c add x12, x20, w12, sxtw #3 - 460: ab2b877e adds x30, x27, w11, sxtb #1 - 464: cb21c8ee sub x14, x7, w1, sxtw #2 - 468: eb3ba47d subs x29, x3, w27, sxth #1 - 46c: 3a4d400e ccmn w0, w13, #0xe, mi // mi = first - 470: 7a5232c6 ccmp w22, w18, #0x6, cc // cc = lo, ul, last - 474: ba5e624e ccmn x18, x30, #0xe, vs - 478: fa53814c ccmp x10, x19, #0xc, hi // hi = pmore - 47c: 3a52d8c2 ccmn w6, #0x12, #0x2, le - 480: 7a4d8924 ccmp w9, #0xd, #0x4, hi // hi = pmore - 484: ba4b3aab ccmn x21, #0xb, #0xb, cc // cc = lo, ul, last - 488: fa4d7882 ccmp x4, #0xd, #0x2, vc - 48c: 1a96804c csel w12, w2, w22, hi // hi = pmore - 490: 1a912618 csinc w24, w16, w17, cs // cs = hs, nlast - 494: 5a90b0e6 csinv w6, w7, w16, lt // lt = tstop - 498: 5a96976b csneg w11, w27, w22, ls // ls = plast - 49c: 9a9db06a csel x10, x3, x29, lt // lt = tstop - 4a0: 9a9b374c csinc x12, x26, x27, cc // cc = lo, ul, last - 4a4: da95c14f csinv x15, x10, x21, gt - 4a8: da89c6fe csneg x30, x23, x9, gt - 4ac: 5ac0015e rbit w30, w10 - 4b0: 5ac005fd rev16 w29, w15 - 4b4: 5ac00bdd rev w29, w30 - 4b8: 5ac012b9 clz w25, w21 - 4bc: 5ac01404 cls w4, w0 - 4c0: dac002b2 rbit x18, x21 - 4c4: dac0061d rev16 x29, x16 - 4c8: dac00a95 rev32 x21, x20 - 4cc: dac00e66 rev x6, x19 - 4d0: dac0107e clz x30, x3 - 4d4: dac01675 cls x21, x19 - 4d8: 1ac00b0b udiv w11, w24, w0 - 4dc: 1ace0f3b sdiv w27, w25, w14 - 4e0: 1ad221c3 lsl w3, w14, w18 - 4e4: 1ad825e7 lsr w7, w15, w24 - 4e8: 1ad92a3c asr w28, w17, w25 - 4ec: 1adc2f42 ror w2, w26, w28 - 4f0: 9ada0b25 udiv x5, x25, x26 - 4f4: 9ad20e1b sdiv x27, x16, x18 - 4f8: 9acc22a6 lsl x6, x21, x12 - 4fc: 9acc2480 lsr x0, x4, x12 - 500: 9adc2a3b asr x27, x17, x28 - 504: 9ad22c5c ror x28, x2, x18 - 508: 9bce7dea umulh x10, x15, x14 - 50c: 9b597c6e smulh x14, x3, x25 - 510: 1b0e166f madd w15, w19, w14, w5 - 514: 1b1ae490 msub w16, w4, w26, w25 - 518: 9b023044 madd x4, x2, x2, x12 - 51c: 9b089e3d msub x29, x17, x8, x7 - 520: 9b391083 smaddl x3, w4, w25, x4 - 524: 9b24c73a smsubl x26, w25, w4, x17 - 528: 9bb15f40 umaddl x0, w26, w17, x23 - 52c: 9bbcc6af umsubl x15, w21, w28, x17 - 530: 1e23095b fmul s27, s10, s3 - 534: 1e3918e0 fdiv s0, s7, s25 - 538: 1e2f28c9 fadd s9, s6, s15 - 53c: 1e2a39fd fsub s29, s15, s10 - 540: 1e270a22 fmul s2, s17, s7 - 544: 1e77096b fmul d11, d11, d23 - 548: 1e771ba7 fdiv d7, d29, d23 - 54c: 1e6b2b6e fadd d14, d27, d11 - 550: 1e78388b fsub d11, d4, d24 - 554: 1e6e09ec fmul d12, d15, d14 - 558: 1f1c3574 fmadd s20, s11, s28, s13 - 55c: 1f17f98b fmsub s11, s12, s23, s30 - 560: 1f2935da fnmadd s26, s14, s9, s13 - 564: 1f2574ea fnmadd s10, s7, s5, s29 - 568: 1f4b306f fmadd d15, d3, d11, d12 - 56c: 1f5ec7cf fmsub d15, d30, d30, d17 - 570: 1f6f3e93 fnmadd d19, d20, d15, d15 - 574: 1f6226a9 fnmadd d9, d21, d2, d9 - 578: 1e2040fb fmov s27, s7 - 57c: 1e20c3dd fabs s29, s30 - 580: 1e214031 fneg s17, s1 - 584: 1e21c0c2 fsqrt s2, s6 - 588: 1e22c06a fcvt d10, s3 - 58c: 1e604178 fmov d24, d11 - 590: 1e60c027 fabs d7, d1 - 594: 1e61400b fneg d11, d0 - 598: 1e61c243 fsqrt d3, d18 - 59c: 1e6240dc fcvt s28, d6 - 5a0: 1e3800d6 fcvtzs w22, s6 - 5a4: 9e380360 fcvtzs x0, s27 - 5a8: 1e78005a fcvtzs w26, d2 - 5ac: 9e7800e5 fcvtzs x5, d7 - 5b0: 1e22017c scvtf s28, w11 - 5b4: 9e2201b9 scvtf s25, x13 - 5b8: 1e6202eb scvtf d11, w23 - 5bc: 9e620113 scvtf d19, x8 - 5c0: 1e2602b2 fmov w18, s21 - 5c4: 9e660299 fmov x25, d20 - 5c8: 1e270253 fmov s19, w18 - 5cc: 9e6703a2 fmov d2, x29 - 5d0: 1e2822c0 fcmp s22, s8 - 5d4: 1e7322a0 fcmp d21, d19 - 5d8: 1e202288 fcmp s20, #0.0 - 5dc: 1e602168 fcmp d11, #0.0 - 5e0: 293c19f4 stp w20, w6, [x15, #-32] - 5e4: 2966387b ldp w27, w14, [x3, #-208] - 5e8: 69762971 ldpsw x17, x10, [x11, #-80] - 5ec: a9041dc7 stp x7, x7, [x14, #64] - 5f0: a9475c0c ldp x12, x23, [x0, #112] - 5f4: 29b61ccd stp w13, w7, [x6, #-80]! - 5f8: 29ee405e ldp w30, w16, [x2, #-144]! - 5fc: 69ee0744 ldpsw x4, x1, [x26, #-144]! - 600: a9843977 stp x23, x14, [x11, #64]! - 604: a9f46ebd ldp x29, x27, [x21, #-192]! - 608: 28ba16b6 stp w22, w5, [x21], #-48 - 60c: 28fc44db ldp w27, w17, [x6], #-32 - 610: 68f61831 ldpsw x17, x6, [x1], #-80 - 614: a8b352ad stp x13, x20, [x21], #-208 - 618: a8c56d5e ldp x30, x27, [x10], #80 - 61c: 28024565 stnp w5, w17, [x11, #16] - 620: 2874134e ldnp w14, w4, [x26, #-96] - 624: a8027597 stnp x23, x29, [x12, #32] - 628: a87b1aa0 ldnp x0, x6, [x21, #-80] - 62c: 0c40734f ld1 {v15.8b}, [x26] - 630: 4cdfa177 ld1 {v23.16b, v24.16b}, [x11], #32 - 634: 0cc76ee8 ld1 {v8.1d-v10.1d}, [x23], x7 - 638: 4cdf2733 ld1 {v19.8h-v22.8h}, [x25], #64 - 63c: 0d40c23d ld1r {v29.8b}, [x17] - 640: 4ddfcaf8 ld1r {v24.4s}, [x23], #4 - 644: 0dd9ccaa ld1r {v10.1d}, [x5], x25 - 648: 4c408d52 ld2 {v18.2d, v19.2d}, [x10] - 64c: 0cdf85ec ld2 {v12.4h, v13.4h}, [x15], #16 - 650: 4d60c259 ld2r {v25.16b, v26.16b}, [x18] - 654: 0dffcbc1 ld2r {v1.2s, v2.2s}, [x30], #8 - 658: 4de9ce50 ld2r {v16.2d, v17.2d}, [x18], x9 - 65c: 4cc24999 ld3 {v25.4s-v27.4s}, [x12], x2 - 660: 0c404a7a ld3 {v26.2s-v28.2s}, [x19] - 664: 4d40e6af ld3r {v15.8h-v17.8h}, [x21] - 668: 4ddfe9b9 ld3r {v25.4s-v27.4s}, [x13], #12 - 66c: 0dddef8e ld3r {v14.1d-v16.1d}, [x28], x29 - 670: 4cdf07b1 ld4 {v17.8h-v20.8h}, [x29], #64 - 674: 0cc000fb ld4 {v27.8b-v30.8b}, [x7], x0 - 678: 0d60e258 ld4r {v24.8b-v27.8b}, [x18] - 67c: 0dffe740 ld4r {v0.4h-v3.4h}, [x26], #8 - 680: 0de2eb2c ld4r {v12.2s-v15.2s}, [x25], x2 - 684: 0e31baf6 addv b22, v23.8b - 688: 4e31bb9b addv b27, v28.16b - 68c: 0e71b8a4 addv h4, v5.4h - 690: 4e71b907 addv h7, v8.8h - 694: 4eb1b8e6 addv s6, v7.4s - 698: 0e30a841 smaxv b1, v2.8b - 69c: 4e30ab7a smaxv b26, v27.16b - 6a0: 0e70aa0f smaxv h15, v16.4h - 6a4: 4e70a862 smaxv h2, v3.8h - 6a8: 4eb0a9cd smaxv s13, v14.4s - 6ac: 6e30f9cd fmaxv s13, v14.4s - 6b0: 0e31ab38 sminv b24, v25.8b - 6b4: 4e31ab17 sminv b23, v24.16b - 6b8: 0e71a8a4 sminv h4, v5.4h - 6bc: 4e71aa93 sminv h19, v20.8h - 6c0: 4eb1aa0f sminv s15, v16.4s - 6c4: 6eb0f820 fminv s0, v1.4s - 6c8: 0e20b8a4 abs v4.8b, v5.8b - 6cc: 4e20bab4 abs v20.16b, v21.16b - 6d0: 0e60b98b abs v11.4h, v12.4h - 6d4: 4e60bbdd abs v29.8h, v30.8h - 6d8: 0ea0ba0f abs v15.2s, v16.2s - 6dc: 4ea0bad5 abs v21.4s, v22.4s - 6e0: 4ee0b8a4 abs v4.2d, v5.2d - 6e4: 0ea0f9ee fabs v14.2s, v15.2s - 6e8: 4ea0faf6 fabs v22.4s, v23.4s - 6ec: 4ee0fb59 fabs v25.2d, v26.2d - 6f0: 2ea0f8e6 fneg v6.2s, v7.2s - 6f4: 6ea0f9ac fneg v12.4s, v13.4s - 6f8: 6ee0f9ee fneg v14.2d, v15.2d - 6fc: 2ea1f9cd fsqrt v13.2s, v14.2s - 700: 6ea1f9ee fsqrt v14.4s, v15.4s - 704: 6ee1f949 fsqrt v9.2d, v10.2d - 708: 2e205b59 mvn v25.8b, v26.8b - 70c: 6e205bbc mvn v28.16b, v29.16b - 710: 0e2c1d6a and v10.8b, v11.8b, v12.8b - 714: 4e351e93 and v19.16b, v20.16b, v21.16b - 718: 0ead1d8b orr v11.8b, v12.8b, v13.8b - 71c: 4eb31e51 orr v17.16b, v18.16b, v19.16b - 720: 2e371ed5 eor v21.8b, v22.8b, v23.8b - 724: 6e311e0f eor v15.16b, v16.16b, v17.16b - 728: 0e3686b4 add v20.8b, v21.8b, v22.8b - 72c: 4e398717 add v23.16b, v24.16b, v25.16b - 730: 0e7c877a add v26.4h, v27.4h, v28.4h - 734: 4e6784c5 add v5.8h, v6.8h, v7.8h - 738: 0ea884e6 add v6.2s, v7.2s, v8.2s - 73c: 4eb1860f add v15.4s, v16.4s, v17.4s - 740: 4ef1860f add v15.2d, v16.2d, v17.2d - 744: 0e3bd759 fadd v25.2s, v26.2s, v27.2s - 748: 4e32d630 fadd v16.4s, v17.4s, v18.4s - 74c: 4e7dd79b fadd v27.2d, v28.2d, v29.2d - 750: 2e3a8738 sub v24.8b, v25.8b, v26.8b - 754: 6e31860f sub v15.16b, v16.16b, v17.16b - 758: 2e7b8759 sub v25.4h, v26.4h, v27.4h - 75c: 6e7085ee sub v14.8h, v15.8h, v16.8h - 760: 2eac856a sub v10.2s, v11.2s, v12.2s - 764: 6eaf85cd sub v13.4s, v14.4s, v15.4s - 768: 6ef085ee sub v14.2d, v15.2d, v16.2d - 76c: 0eb6d6b4 fsub v20.2s, v21.2s, v22.2s - 770: 4ea3d441 fsub v1.4s, v2.4s, v3.4s - 774: 4ef8d6f6 fsub v22.2d, v23.2d, v24.2d - 778: 0e209ffe mul v30.8b, v31.8b, v0.8b - 77c: 4e309dee mul v14.16b, v15.16b, v16.16b - 780: 0e649c62 mul v2.4h, v3.4h, v4.4h - 784: 4e689ce6 mul v6.8h, v7.8h, v8.8h - 788: 0ea59c83 mul v3.2s, v4.2s, v5.2s - 78c: 4ea99d07 mul v7.4s, v8.4s, v9.4s - 790: 2e3adf38 fmul v24.2s, v25.2s, v26.2s - 794: 6e22dc20 fmul v0.4s, v1.4s, v2.4s - 798: 6e7ddf9b fmul v27.2d, v28.2d, v29.2d - 79c: 0e7f97dd mla v29.4h, v30.4h, v31.4h - 7a0: 4e6794c5 mla v5.8h, v6.8h, v7.8h - 7a4: 0ea794c5 mla v5.2s, v6.2s, v7.2s - 7a8: 4ebf97dd mla v29.4s, v30.4s, v31.4s - 7ac: 0e2dcd8b fmla v11.2s, v12.2s, v13.2s - 7b0: 4e3bcf59 fmla v25.4s, v26.4s, v27.4s - 7b4: 4e62cc20 fmla v0.2d, v1.2d, v2.2d - 7b8: 2e6097fe mls v30.4h, v31.4h, v0.4h - 7bc: 6e629420 mls v0.8h, v1.8h, v2.8h - 7c0: 2eb49672 mls v18.2s, v19.2s, v20.2s - 7c4: 6ebe97bc mls v28.4s, v29.4s, v30.4s - 7c8: 0ebbcf59 fmls v25.2s, v26.2s, v27.2s - 7cc: 4eabcd49 fmls v9.4s, v10.4s, v11.4s - 7d0: 4efbcf59 fmls v25.2d, v26.2d, v27.2d - 7d4: 2e2efdac fdiv v12.2s, v13.2s, v14.2s - 7d8: 6e31fe0f fdiv v15.4s, v16.4s, v17.4s - 7dc: 6e6dfd8b fdiv v11.2d, v12.2d, v13.2d - 7e0: 0e2c656a smax v10.8b, v11.8b, v12.8b - 7e4: 4e346672 smax v18.16b, v19.16b, v20.16b - 7e8: 0e7a6738 smax v24.4h, v25.4h, v26.4h - 7ec: 4e7766d5 smax v21.8h, v22.8h, v23.8h - 7f0: 0eb96717 smax v23.2s, v24.2s, v25.2s - 7f4: 4ea26420 smax v0.4s, v1.4s, v2.4s - 7f8: 0e32f630 fmax v16.2s, v17.2s, v18.2s - 7fc: 4e2cf56a fmax v10.4s, v11.4s, v12.4s - 800: 4e68f4e6 fmax v6.2d, v7.2d, v8.2d - 804: 0e3e6fbc smin v28.8b, v29.8b, v30.8b - 808: 4e286ce6 smin v6.16b, v7.16b, v8.16b - 80c: 0e676cc5 smin v5.4h, v6.4h, v7.4h - 810: 4e676cc5 smin v5.8h, v6.8h, v7.8h - 814: 0eb66eb4 smin v20.2s, v21.2s, v22.2s - 818: 4eb46e72 smin v18.4s, v19.4s, v20.4s - 81c: 0eb1f60f fmin v15.2s, v16.2s, v17.2s - 820: 4eb4f672 fmin v18.4s, v19.4s, v20.4s - 824: 4efff7dd fmin v29.2d, v30.2d, v31.2d - 828: 2e3c8f7a cmeq v26.8b, v27.8b, v28.8b - 82c: 6e3e8fbc cmeq v28.16b, v29.16b, v30.16b - 830: 2e638c41 cmeq v1.4h, v2.4h, v3.4h - 834: 6e7d8f9b cmeq v27.8h, v28.8h, v29.8h - 838: 2ea28c20 cmeq v0.2s, v1.2s, v2.2s - 83c: 6eb68eb4 cmeq v20.4s, v21.4s, v22.4s - 840: 6efe8fbc cmeq v28.2d, v29.2d, v30.2d - 844: 0e31e60f fcmeq v15.2s, v16.2s, v17.2s - 848: 4e2ee5ac fcmeq v12.4s, v13.4s, v14.4s - 84c: 4e6ce56a fcmeq v10.2d, v11.2d, v12.2d - 850: 0e3e37bc cmgt v28.8b, v29.8b, v30.8b - 854: 4e3e37bc cmgt v28.16b, v29.16b, v30.16b - 858: 0e753693 cmgt v19.4h, v20.4h, v21.4h - 85c: 4e7836f6 cmgt v22.8h, v23.8h, v24.8h - 860: 0eac356a cmgt v10.2s, v11.2s, v12.2s - 864: 4ea634a4 cmgt v4.4s, v5.4s, v6.4s - 868: 4ee037fe cmgt v30.2d, v31.2d, v0.2d - 86c: 2eb6e6b4 fcmgt v20.2s, v21.2s, v22.2s - 870: 6eaae528 fcmgt v8.4s, v9.4s, v10.4s - 874: 6ee0e7fe fcmgt v30.2d, v31.2d, v0.2d - 878: 0e343e72 cmge v18.8b, v19.8b, v20.8b - 87c: 4e2c3d6a cmge v10.16b, v11.16b, v12.16b - 880: 0e7d3f9b cmge v27.4h, v28.4h, v29.4h - 884: 4e643c62 cmge v2.8h, v3.8h, v4.8h - 888: 0eba3f38 cmge v24.2s, v25.2s, v26.2s - 88c: 4ea63ca4 cmge v4.4s, v5.4s, v6.4s - 890: 4ee53c83 cmge v3.2d, v4.2d, v5.2d - 894: 2e2ae528 fcmge v8.2s, v9.2s, v10.2s - 898: 6e38e6f6 fcmge v22.4s, v23.4s, v24.4s - 89c: 6e74e672 fcmge v18.2d, v19.2d, v20.2d - 8a0: ce7c808d sha512h q13, q4, v28.2d - 8a4: ce7986b7 sha512h2 q23, q21, v25.2d - 8a8: cec08078 sha512su0 v24.2d, v3.2d - 8ac: ce778b57 sha512su1 v23.2d, v26.2d, v23.2d - 8b0: ba5fd3e3 ccmn xzr, xzr, #0x3, le - 8b4: 3a5f03e5 ccmn wzr, wzr, #0x5, eq // eq = none - 8b8: fa411be4 ccmp xzr, #0x1, #0x4, ne // ne = any - 8bc: 7a42cbe2 ccmp wzr, #0x2, #0x2, gt - 8c0: 93df03ff ror xzr, xzr, #0 - 8c4: c820ffff stlxp w0, xzr, xzr, [sp] - 8c8: 8822fc7f stlxp w2, wzr, wzr, [x3] - 8cc: c8247cbf stxp w4, xzr, xzr, [x5] - 8d0: 88267fff stxp w6, wzr, wzr, [sp] - 8d4: 4e010fe0 dup v0.16b, wzr - 8d8: 4e081fe1 mov v1.d[0], xzr - 8dc: 4e0c1fe1 mov v1.s[1], wzr - 8e0: 4e0a1fe1 mov v1.h[2], wzr - 8e4: 4e071fe1 mov v1.b[3], wzr - 8e8: 4cc0ac3f ld1 {v31.2d, v0.2d}, [x1], x0 - 8ec: 1e601000 fmov d0, #2.000000000000000000e+00 - 8f0: 1e603000 fmov d0, #2.125000000000000000e+00 - 8f4: 1e621000 fmov d0, #4.000000000000000000e+00 - 8f8: 1e623000 fmov d0, #4.250000000000000000e+00 - 8fc: 1e641000 fmov d0, #8.000000000000000000e+00 - 900: 1e643000 fmov d0, #8.500000000000000000e+00 - 904: 1e661000 fmov d0, #1.600000000000000000e+01 - 908: 1e663000 fmov d0, #1.700000000000000000e+01 - 90c: 1e681000 fmov d0, #1.250000000000000000e-01 - 910: 1e683000 fmov d0, #1.328125000000000000e-01 - 914: 1e6a1000 fmov d0, #2.500000000000000000e-01 - 918: 1e6a3000 fmov d0, #2.656250000000000000e-01 - 91c: 1e6c1000 fmov d0, #5.000000000000000000e-01 - 920: 1e6c3000 fmov d0, #5.312500000000000000e-01 - 924: 1e6e1000 fmov d0, #1.000000000000000000e+00 - 928: 1e6e3000 fmov d0, #1.062500000000000000e+00 - 92c: 1e701000 fmov d0, #-2.000000000000000000e+00 - 930: 1e703000 fmov d0, #-2.125000000000000000e+00 - 934: 1e721000 fmov d0, #-4.000000000000000000e+00 - 938: 1e723000 fmov d0, #-4.250000000000000000e+00 - 93c: 1e741000 fmov d0, #-8.000000000000000000e+00 - 940: 1e743000 fmov d0, #-8.500000000000000000e+00 - 944: 1e761000 fmov d0, #-1.600000000000000000e+01 - 948: 1e763000 fmov d0, #-1.700000000000000000e+01 - 94c: 1e781000 fmov d0, #-1.250000000000000000e-01 - 950: 1e783000 fmov d0, #-1.328125000000000000e-01 - 954: 1e7a1000 fmov d0, #-2.500000000000000000e-01 - 958: 1e7a3000 fmov d0, #-2.656250000000000000e-01 - 95c: 1e7c1000 fmov d0, #-5.000000000000000000e-01 - 960: 1e7c3000 fmov d0, #-5.312500000000000000e-01 - 964: 1e7e1000 fmov d0, #-1.000000000000000000e+00 - 968: 1e7e3000 fmov d0, #-1.062500000000000000e+00 - 96c: f82f8075 swp x15, x21, [x3] - 970: f8380328 ldadd x24, x8, [x25] - 974: f8341230 ldclr x20, x16, [x17] - 978: f8222001 ldeor x2, x1, [x0] - 97c: f8383064 ldset x24, x4, [x3] - 980: f82c539f stsmin x12, [x28] - 984: f82a405a ldsmax x10, x26, [x2] - 988: f82c73f2 ldumin x12, x18, [sp] - 98c: f82163ad ldumax x1, x13, [x29] - 990: f8a08193 swpa x0, x19, [x12] - 994: f8b101b6 ldadda x17, x22, [x13] - 998: f8bc13fe ldclra x28, x30, [sp] - 99c: f8a1239a ldeora x1, x26, [x28] - 9a0: f8a4309e ldseta x4, x30, [x4] - 9a4: f8a6535e ldsmina x6, x30, [x26] - 9a8: f8b24109 ldsmaxa x18, x9, [x8] - 9ac: f8ac7280 ldumina x12, x0, [x20] - 9b0: f8a16058 ldumaxa x1, x24, [x2] - 9b4: f8e08309 swpal x0, x9, [x24] - 9b8: f8fa03d0 ldaddal x26, x16, [x30] - 9bc: f8e312ea ldclral x3, x10, [x23] - 9c0: f8ea2244 ldeoral x10, x4, [x18] - 9c4: f8e2310b ldsetal x2, x11, [x8] - 9c8: f8ea522f ldsminal x10, x15, [x17] - 9cc: f8e2418a ldsmaxal x2, x10, [x12] - 9d0: f8ec71af lduminal x12, x15, [x13] - 9d4: f8e26287 ldumaxal x2, x7, [x20] - 9d8: f87a8090 swpl x26, x16, [x4] - 9dc: f8620184 ldaddl x2, x4, [x12] - 9e0: f8721215 ldclrl x18, x21, [x16] - 9e4: f87222ab ldeorl x18, x11, [x21] - 9e8: f877334c ldsetl x23, x12, [x26] - 9ec: f87751dc ldsminl x23, x28, [x14] - 9f0: f86b4038 ldsmaxl x11, x24, [x1] - 9f4: f86c715f stuminl x12, [x10] - 9f8: f8706047 ldumaxl x16, x7, [x2] - 9fc: b823826d swp w3, w13, [x19] - a00: b8310070 ldadd w17, w16, [x3] - a04: b82113cb ldclr w1, w11, [x30] - a08: b82521e8 ldeor w5, w8, [x15] - a0c: b83d301e ldset w29, w30, [x0] - a10: b8345287 ldsmin w20, w7, [x20] - a14: b83742bc ldsmax w23, w28, [x21] - a18: b83b70b9 ldumin w27, w25, [x5] - a1c: b8216217 ldumax w1, w23, [x16] - a20: b8bf8185 swpa wzr, w5, [x12] - a24: b8a901fc ldadda w9, w28, [x15] - a28: b8bd13f6 ldclra w29, w22, [sp] - a2c: b8b320bf ldeora w19, wzr, [x5] - a30: b8ae33f2 ldseta w14, w18, [sp] - a34: b8b2529b ldsmina w18, w27, [x20] - a38: b8b0416c ldsmaxa w16, w12, [x11] - a3c: b8a973c6 ldumina w9, w6, [x30] - a40: b8b1639b ldumaxa w17, w27, [x28] - a44: b8fe8147 swpal w30, w7, [x10] - a48: b8f4008a ldaddal w20, w10, [x4] - a4c: b8f81231 ldclral w24, w17, [x17] - a50: b8f623a3 ldeoral w22, w3, [x29] - a54: b8ef3276 ldsetal w15, w22, [x19] - a58: b8f35056 ldsminal w19, w22, [x2] - a5c: b8ef4186 ldsmaxal w15, w6, [x12] - a60: b8f071ab lduminal w16, w11, [x13] - a64: b8f763c1 ldumaxal w23, w1, [x30] - a68: b8738225 swpl w19, w5, [x17] - a6c: b86202d0 ldaddl w2, w16, [x22] - a70: b86d12aa ldclrl w13, w10, [x21] - a74: b87d219b ldeorl w29, w27, [x12] - a78: b87b3023 ldsetl w27, w3, [x1] - a7c: b87f5278 ldsminl wzr, w24, [x19] - a80: b8714389 ldsmaxl w17, w9, [x28] - a84: b87b70ef lduminl w27, w15, [x7] - a88: b87563f7 ldumaxl w21, w23, [sp] - */ static const unsigned int insns[] = @@ -1600,30 +1001,30 @@ 0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061, 0x120cb166, 0x321764bc, 0x52174681, 0x720c0247, 0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01, - 0x14000000, 0x17ffffd7, 0x14000279, 0x94000000, - 0x97ffffd4, 0x94000276, 0x3400000a, 0x34fffa2a, - 0x34004e6a, 0x35000008, 0x35fff9c8, 0x35004e08, - 0xb400000b, 0xb4fff96b, 0xb4004dab, 0xb500001d, - 0xb5fff91d, 0xb5004d5d, 0x10000013, 0x10fff8b3, - 0x10004cf3, 0x90000013, 0x36300016, 0x3637f836, - 0x36304c76, 0x3758000c, 0x375ff7cc, 0x37584c0c, + 0x14000000, 0x17ffffd7, 0x140002c9, 0x94000000, + 0x97ffffd4, 0x940002c6, 0x3400000a, 0x34fffa2a, + 0x3400586a, 0x35000008, 0x35fff9c8, 0x35005808, + 0xb400000b, 0xb4fff96b, 0xb40057ab, 0xb500001d, + 0xb5fff91d, 0xb500575d, 0x10000013, 0x10fff8b3, + 0x100056f3, 0x90000013, 0x36300016, 0x3637f836, + 0x36305676, 0x3758000c, 0x375ff7cc, 0x3758560c, 0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc, 0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f, 0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016, 0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0, - 0x540049e0, 0x54000001, 0x54fff541, 0x54004981, - 0x54000002, 0x54fff4e2, 0x54004922, 0x54000002, - 0x54fff482, 0x540048c2, 0x54000003, 0x54fff423, - 0x54004863, 0x54000003, 0x54fff3c3, 0x54004803, - 0x54000004, 0x54fff364, 0x540047a4, 0x54000005, - 0x54fff305, 0x54004745, 0x54000006, 0x54fff2a6, - 0x540046e6, 0x54000007, 0x54fff247, 0x54004687, - 0x54000008, 0x54fff1e8, 0x54004628, 0x54000009, - 0x54fff189, 0x540045c9, 0x5400000a, 0x54fff12a, - 0x5400456a, 0x5400000b, 0x54fff0cb, 0x5400450b, - 0x5400000c, 0x54fff06c, 0x540044ac, 0x5400000d, - 0x54fff00d, 0x5400444d, 0x5400000e, 0x54ffefae, - 0x540043ee, 0x5400000f, 0x54ffef4f, 0x5400438f, + 0x540053e0, 0x54000001, 0x54fff541, 0x54005381, + 0x54000002, 0x54fff4e2, 0x54005322, 0x54000002, + 0x54fff482, 0x540052c2, 0x54000003, 0x54fff423, + 0x54005263, 0x54000003, 0x54fff3c3, 0x54005203, + 0x54000004, 0x54fff364, 0x540051a4, 0x54000005, + 0x54fff305, 0x54005145, 0x54000006, 0x54fff2a6, + 0x540050e6, 0x54000007, 0x54fff247, 0x54005087, + 0x54000008, 0x54fff1e8, 0x54005028, 0x54000009, + 0x54fff189, 0x54004fc9, 0x5400000a, 0x54fff12a, + 0x54004f6a, 0x5400000b, 0x54fff0cb, 0x54004f0b, + 0x5400000c, 0x54fff06c, 0x54004eac, 0x5400000d, + 0x54fff00d, 0x54004e4d, 0x5400000e, 0x54ffefae, + 0x54004dee, 0x5400000f, 0x54ffef4f, 0x54004d8f, 0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60, 0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, 0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200, @@ -1655,7 +1056,7 @@ 0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176, 0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422, 0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a, - 0xbd1b1869, 0x580033db, 0x1800000b, 0xf8945060, + 0xbd1b1869, 0x58003ddb, 0x1800000b, 0xf8945060, 0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035, 0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380, 0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b12, @@ -1732,33 +1133,53 @@ 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, 0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f, 0xc8247cbf, 0x88267fff, 0x4e010fe0, 0x4e081fe1, 0x4e0c1fe1, - 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f, 0x1e601000, - 0x1e603000, 0x1e621000, 0x1e623000, 0x1e641000, - 0x1e643000, 0x1e661000, 0x1e663000, 0x1e681000, - 0x1e683000, 0x1e6a1000, 0x1e6a3000, 0x1e6c1000, - 0x1e6c3000, 0x1e6e1000, 0x1e6e3000, 0x1e701000, - 0x1e703000, 0x1e721000, 0x1e723000, 0x1e741000, - 0x1e743000, 0x1e761000, 0x1e763000, 0x1e781000, - 0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e7c1000, - 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf82f8075, - 0xf8380328, 0xf8341230, 0xf8222001, 0xf8383064, - 0xf82c539f, 0xf82a405a, 0xf82c73f2, 0xf82163ad, - 0xf8a08193, 0xf8b101b6, 0xf8bc13fe, 0xf8a1239a, - 0xf8a4309e, 0xf8a6535e, 0xf8b24109, 0xf8ac7280, - 0xf8a16058, 0xf8e08309, 0xf8fa03d0, 0xf8e312ea, - 0xf8ea2244, 0xf8e2310b, 0xf8ea522f, 0xf8e2418a, - 0xf8ec71af, 0xf8e26287, 0xf87a8090, 0xf8620184, - 0xf8721215, 0xf87222ab, 0xf877334c, 0xf87751dc, - 0xf86b4038, 0xf86c715f, 0xf8706047, 0xb823826d, - 0xb8310070, 0xb82113cb, 0xb82521e8, 0xb83d301e, - 0xb8345287, 0xb83742bc, 0xb83b70b9, 0xb8216217, - 0xb8bf8185, 0xb8a901fc, 0xb8bd13f6, 0xb8b320bf, - 0xb8ae33f2, 0xb8b2529b, 0xb8b0416c, 0xb8a973c6, - 0xb8b1639b, 0xb8fe8147, 0xb8f4008a, 0xb8f81231, - 0xb8f623a3, 0xb8ef3276, 0xb8f35056, 0xb8ef4186, - 0xb8f071ab, 0xb8f763c1, 0xb8738225, 0xb86202d0, - 0xb86d12aa, 0xb87d219b, 0xb87b3023, 0xb87f5278, - 0xb8714389, 0xb87b70ef, 0xb87563f7, + 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f, 0x05a08020, + 0x04b0e3e0, 0x0470e7e1, 0x042f9c20, 0x043f9c35, + 0x047f9c20, 0x04ff9c20, 0x04299420, 0x04319160, + 0x0461943e, 0x04a19020, 0x042053ff, 0x047f5401, + 0x25208028, 0x2538cfe0, 0x2578d001, 0x25b8efe2, + 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea, 0xa547a814, + 0xa4084ffe, 0xa55c53e0, 0xa5e1540b, 0xe400fbf6, + 0xe408ffff, 0xe547e400, 0xe4014be0, 0xe4a84fe0, + 0xe5f25000, 0x858043e0, 0x85a043ff, 0xe59f5d08, + 0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000, + 0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000, + 0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000, + 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000, + 0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000, + 0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000, + 0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000, + 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, + 0xf82f8075, 0xf8380328, 0xf8341230, 0xf8222001, + 0xf8383064, 0xf82c539f, 0xf82a405a, 0xf82c73f2, + 0xf82163ad, 0xf8a08193, 0xf8b101b6, 0xf8bc13fe, + 0xf8a1239a, 0xf8a4309e, 0xf8a6535e, 0xf8b24109, + 0xf8ac7280, 0xf8a16058, 0xf8e08309, 0xf8fa03d0, + 0xf8e312ea, 0xf8ea2244, 0xf8e2310b, 0xf8ea522f, + 0xf8e2418a, 0xf8ec71af, 0xf8e26287, 0xf87a8090, + 0xf8620184, 0xf8721215, 0xf87222ab, 0xf877334c, + 0xf87751dc, 0xf86b4038, 0xf86c715f, 0xf8706047, + 0xb823826d, 0xb8310070, 0xb82113cb, 0xb82521e8, + 0xb83d301e, 0xb8345287, 0xb83742bc, 0xb83b70b9, + 0xb8216217, 0xb8bf8185, 0xb8a901fc, 0xb8bd13f6, + 0xb8b320bf, 0xb8ae33f2, 0xb8b2529b, 0xb8b0416c, + 0xb8a973c6, 0xb8b1639b, 0xb8fe8147, 0xb8f4008a, + 0xb8f81231, 0xb8f623a3, 0xb8ef3276, 0xb8f35056, + 0xb8ef4186, 0xb8f071ab, 0xb8f763c1, 0xb8738225, + 0xb86202d0, 0xb86d12aa, 0xb87d219b, 0xb87b3023, + 0xb87f5278, 0xb8714389, 0xb87b70ef, 0xb87563f7, + 0x04fe0058, 0x04b60552, 0x65c00222, 0x65c20ad9, + 0x65db046c, 0x0416b35c, 0x04001e49, 0x045085e4, + 0x04daa856, 0x04d39cb4, 0x041191c0, 0x04900b79, + 0x0497bb1a, 0x049ea4c0, 0x040805e0, 0x044a04a9, + 0x0481069b, 0x049ca554, 0x65c09cd0, 0x65cd8fa2, + 0x65c69ac2, 0x65c78f6e, 0x65828457, 0x04ddb14a, + 0x65c2ac76, 0x65c0a430, 0x6581b190, 0x658da20c, + 0x658194b4, 0x65fb1187, 0x65bc2450, 0x65b34624, + 0x65f8750c, 0x04174152, 0x04107db3, 0x042e30e0, + 0x04aa3119, 0x047b32d4, 0x049a2e43, 0x04182787, + 0x04992a00, 0x044825f6, 0x040a2b36, 0x65c731be, + 0x658621ab, 0x65983334, 0x04412624, }; // END Generated code -- do not edit
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -139,6 +139,9 @@ // Java stack pointer REGISTER_DECLARATION(Register, esp, r20); +// Preserved predicate register with all elements set TRUE. +REGISTER_DECLARATION(PRegister, ptrue, p7); + #define assert_cond(ARG1) assert(ARG1, #ARG1) namespace asm_util { @@ -273,6 +276,14 @@ f(r->encoding_nocheck(), lsb + 4, lsb); } + void prf(PRegister r, int lsb) { + f(r->encoding_nocheck(), lsb + 3, lsb); + } + + void pgrf(PRegister r, int lsb) { + f(r->encoding_nocheck(), lsb + 2, lsb); + } + unsigned get(int msb = 31, int lsb = 0) { int nbits = msb - lsb + 1; unsigned mask = ((1U << nbits) - 1) << lsb; @@ -561,6 +572,18 @@ void lea(MacroAssembler *, Register) const; static bool offset_ok_for_immed(int64_t offset, uint shift); + + static bool offset_ok_for_sve_immed(long offset, int shift, int vl /* sve vector length */) { + if (offset % vl == 0) { + // Convert address offset into sve imm offset (MUL VL). + int sve_offset = offset / vl; + if (((-(1 << (shift - 1))) <= sve_offset) && (sve_offset < (1 << (shift - 1)))) { + // sve_offset can be encoded + return true; + } + } + return false; + } }; // Convience classes @@ -684,6 +707,12 @@ void rf(FloatRegister reg, int lsb) { current->rf(reg, lsb); } + void prf(PRegister reg, int lsb) { + current->prf(reg, lsb); + } + void pgrf(PRegister reg, int lsb) { + current->pgrf(reg, lsb); + } void fixed(unsigned value, unsigned mask) { current->fixed(value, mask); } @@ -2104,6 +2133,21 @@ #undef INSN #undef INSN1 +// Floating-point compare. 3-registers versions (scalar). +#define INSN(NAME, sz, e) \ + void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + f(0b01111110, 31, 24), f(e, 23), f(sz, 22), f(1, 21), rf(Vm, 16); \ + f(0b111011, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } \ + + INSN(facged, 1, 0); // facge-double + INSN(facges, 0, 0); // facge-single + INSN(facgtd, 1, 1); // facgt-double + INSN(facgts, 0, 1); // facgt-single + +#undef INSN + // Floating-point Move (immediate) private: unsigned pack(double value); @@ -2554,20 +2598,17 @@ f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); } - void umov(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { - starti; - f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21); - f(((idx<<1)|1)<<(int)T, 20, 16), f(0b001111, 15, 10); - rf(Vn, 5), rf(Rd, 0); +#define INSN(NAME, op) \ + void NAME(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { \ + starti; \ + f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21); \ + f(((idx<<1)|1)<<(int)T, 20, 16), f(op, 15, 10); \ + rf(Vn, 5), rf(Rd, 0); \ } - void smov(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { - starti; - f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21); - f(((idx<<1)|1)<<(int)T, 20, 16), f(0b001011, 15, 10); - rf(Vn, 5), rf(Rd, 0); - } - + INSN(umov, 0b001111); + INSN(smov, 0b001011); +#undef INSN #define INSN(NAME, opc, opc2, isSHR) \ void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \ @@ -2598,6 +2639,20 @@ #undef INSN +#define INSN(NAME, opc, opc2, isSHR) \ + void NAME(FloatRegister Vd, FloatRegister Vn, int shift){ \ + starti; \ + int encodedShift = isSHR ? 128 - shift : 64 + shift; \ + f(0b01, 31, 30), f(opc, 29), f(0b111110, 28, 23), \ + f(encodedShift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(shld, 0, 0b010101, /* isSHR = */ false); + INSN(sshrd, 0, 0b000001, /* isSHR = */ true); + INSN(ushrd, 1, 0b000001, /* isSHR = */ true); + +#undef INSN + private: void _xshll(sign_kind sign, FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { starti; @@ -2814,7 +2869,7 @@ #undef INSN -void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index) + void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index) { starti; assert(T == T8B || T == T16B, "invalid arrangement"); @@ -2824,6 +2879,292 @@ f(0, 10), rf(Vn, 5), rf(Vd, 0); } +// SVE arithmetics - unpredicated +#define INSN(NAME, opcode) \ + void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \ + starti; \ + assert(T != Q, "invalid register variant"); \ + f(0b00000100, 31, 24), f(T, 23, 22), f(1, 21), \ + rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \ + } + INSN(sve_add, 0b000); + INSN(sve_sub, 0b001); +#undef INSN + +// SVE floating-point arithmetic - unpredicated +#define INSN(NAME, opcode) \ + void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \ + starti; \ + assert(T == S || T == D, "invalid register variant"); \ + f(0b01100101, 31, 24), f(T, 23, 22), f(0, 21), \ + rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \ + } + + INSN(sve_fadd, 0b000); + INSN(sve_fmul, 0b010); + INSN(sve_fsub, 0b001); +#undef INSN + +private: + void sve_predicate_reg_insn(unsigned op24, unsigned op13, + FloatRegister Zd_or_Vd, SIMD_RegVariant T, + PRegister Pg, FloatRegister Zn_or_Vn) { + starti; + f(op24, 31, 24), f(T, 23, 22), f(op13, 21, 13); + pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0); + } + +public: + +// SVE integer arithmetics - predicate +#define INSN(NAME, op1, op2) \ + void NAME(FloatRegister Zdn_or_Zd_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm_or_Vn) { \ + assert(T != Q, "invalid register variant"); \ + sve_predicate_reg_insn(op1, op2, Zdn_or_Zd_or_Vd, T, Pg, Znm_or_Vn); \ + } + + INSN(sve_abs, 0b00000100, 0b010110101); // vector abs, unary + INSN(sve_add, 0b00000100, 0b000000000); // vector add + INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar + INSN(sve_asr, 0b00000100, 0b010000100); // vector arithmetic shift right + INSN(sve_cnt, 0b00000100, 0b011010101) // count non-zero bits + INSN(sve_cpy, 0b00000101, 0b100000100); // copy scalar to each active vector element + INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar + INSN(sve_lsl, 0b00000100, 0b010011100); // vector logical shift left + INSN(sve_lsr, 0b00000100, 0b010001100); // vector logical shift right + INSN(sve_mul, 0b00000100, 0b010000000); // vector mul + INSN(sve_neg, 0b00000100, 0b010111101); // vector neg, unary + INSN(sve_not, 0b00000100, 0b011110101); // bitwise invert vector, unary + INSN(sve_orv, 0b00000100, 0b011000001); // bitwise or reduction to scalar + INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors + INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar + INSN(sve_smin, 0b00000100, 0b001010000); // signed minimum vectors + INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar + INSN(sve_sub, 0b00000100, 0b000001000); // vector sub + INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar +#undef INSN + +// SVE floating-point arithmetics - predicate +#define INSN(NAME, op1, op2) \ + void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \ + assert(T == S || T == D, "invalid register variant"); \ + sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm); \ + } + + INSN(sve_fabs, 0b00000100, 0b011100101); + INSN(sve_fadd, 0b01100101, 0b000000100); + INSN(sve_fadda, 0b01100101, 0b011000001); // add strictly-ordered reduction to scalar Vd + INSN(sve_fdiv, 0b01100101, 0b001101100); + INSN(sve_fmax, 0b01100101, 0b000110100); // floating-point maximum + INSN(sve_fmaxv, 0b01100101, 0b000110001); // floating-point maximum recursive reduction to scalar + INSN(sve_fmin, 0b01100101, 0b000111100); // floating-point minimum + INSN(sve_fminv, 0b01100101, 0b000111001); // floating-point minimum recursive reduction to scalar + INSN(sve_fmul, 0b01100101, 0b000010100); + INSN(sve_fneg, 0b00000100, 0b011101101); + INSN(sve_frintm, 0b01100101, 0b000010101); // floating-point round to integral value, toward minus infinity + INSN(sve_frintn, 0b01100101, 0b000000101); // floating-point round to integral value, nearest with ties to even + INSN(sve_frintp, 0b01100101, 0b000001101); // floating-point round to integral value, toward plus infinity + INSN(sve_fsqrt, 0b01100101, 0b001101101); + INSN(sve_fsub, 0b01100101, 0b000001100); +#undef INSN + + // SVE multiple-add/sub - predicated +#define INSN(NAME, op0, op1, op2) \ + void NAME(FloatRegister Zda, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \ + starti; \ + assert(T != Q, "invalid size"); \ + f(op0, 31, 24), f(T, 23, 22), f(op1, 21), rf(Zm, 16); \ + f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0); \ + } + + INSN(sve_fmla, 0b01100101, 1, 0b000); // floating-point fused multiply-add: Zda = Zda + Zn * Zm + INSN(sve_fmls, 0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm + INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm + INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm + INSN(sve_mla, 0b00000100, 0, 0b010); // multiply-add: Zda = Zda + Zn*Zm + INSN(sve_mls, 0b00000100, 0, 0b011); // multiply-subtract: Zda = Zda + -Zn*Zm +#undef INSN + +// SVE bitwise logical - unpredicated +#define INSN(NAME, opc) \ + void NAME(FloatRegister Zd, FloatRegister Zn, FloatRegister Zm) { \ + starti; \ + f(0b00000100, 31, 24), f(opc, 23, 22), f(1, 21), \ + rf(Zm, 16), f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0); \ + } + INSN(sve_and, 0b00); + INSN(sve_eor, 0b10); + INSN(sve_orr, 0b01); +#undef INSN + +// SVE shift immediate - unpredicated +#define INSN(NAME, opc, isSHR) \ + void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \ + starti; \ + /* The encodings for the tszh:tszl:imm3 fields (bits 23:22 20:19 18:16) \ + * for shift right is calculated as: \ + * 0001 xxx B, shift = 16 - UInt(tszh:tszl:imm3) \ + * 001x xxx H, shift = 32 - UInt(tszh:tszl:imm3) \ + * 01xx xxx S, shift = 64 - UInt(tszh:tszl:imm3) \ + * 1xxx xxx D, shift = 128 - UInt(tszh:tszl:imm3) \ + * for shift left is calculated as: \ + * 0001 xxx B, shift = UInt(tszh:tszl:imm3) - 8 \ + * 001x xxx H, shift = UInt(tszh:tszl:imm3) - 16 \ + * 01xx xxx S, shift = UInt(tszh:tszl:imm3) - 32 \ + * 1xxx xxx D, shift = UInt(tszh:tszl:imm3) - 64 \ + */ \ + assert(T != Q, "Invalid register variant"); \ + if (isSHR) { \ + assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value"); \ + } else { \ + assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value"); \ + } \ + int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0))); \ + int encodedShift = isSHR ? cVal - shift : cVal + shift; \ + int tszh = encodedShift >> 5; \ + int tszl_imm = encodedShift & 0x1f; \ + f(0b00000100, 31, 24); \ + f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16); \ + f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0); \ + } + + INSN(sve_asr, 0b100, /* isSHR = */ true); + INSN(sve_lsl, 0b111, /* isSHR = */ false); + INSN(sve_lsr, 0b101, /* isSHR = */ true); +#undef INSN + +private: + + // Scalar base + immediate index + void sve_ld_st1(FloatRegister Zt, Register Xn, int imm, PRegister Pg, + SIMD_RegVariant T, int op1, int type, int op2) { + starti; + assert_cond(T >= type); + f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21); + f(0, 20), sf(imm, 19, 16), f(op2, 15, 13); + pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0); + } + + // Scalar base + scalar index + void sve_ld_st1(FloatRegister Zt, Register Xn, Register Xm, PRegister Pg, + SIMD_RegVariant T, int op1, int type, int op2) { + starti; + assert_cond(T >= type); + f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21); + rf(Xm, 16), f(op2, 15, 13); + pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0); + } + + void sve_ld_st1(FloatRegister Zt, PRegister Pg, + SIMD_RegVariant T, const Address &a, + int op1, int type, int imm_op2, int scalar_op2) { + switch (a.getMode()) { + case Address::base_plus_offset: + sve_ld_st1(Zt, a.base(), a.offset(), Pg, T, op1, type, imm_op2); + break; + case Address::base_plus_offset_reg: + sve_ld_st1(Zt, a.base(), a.index(), Pg, T, op1, type, scalar_op2); + break; + default: + ShouldNotReachHere(); + } + } + +public: + +// SVE load/store - predicated +#define INSN(NAME, op1, type, imm_op2, scalar_op2) \ + void NAME(FloatRegister Zt, SIMD_RegVariant T, PRegister Pg, const Address &a) { \ + assert(T != Q, "invalid register variant"); \ + sve_ld_st1(Zt, Pg, T, a, op1, type, imm_op2, scalar_op2); \ + } + + INSN(sve_ld1b, 0b1010010, 0b00, 0b101, 0b010); + INSN(sve_st1b, 0b1110010, 0b00, 0b111, 0b010); + INSN(sve_ld1h, 0b1010010, 0b01, 0b101, 0b010); + INSN(sve_st1h, 0b1110010, 0b01, 0b111, 0b010); + INSN(sve_ld1w, 0b1010010, 0b10, 0b101, 0b010); + INSN(sve_st1w, 0b1110010, 0b10, 0b111, 0b010); + INSN(sve_ld1d, 0b1010010, 0b11, 0b101, 0b010); + INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010); +#undef INSN + +// SVE load/store - unpredicated +#define INSN(NAME, op1) \ + void NAME(FloatRegister Zt, const Address &a) { \ + starti; \ + assert(a.index() == noreg, "invalid address variant"); \ + f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16), \ + f(0b010, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), rf(Zt, 0); \ + } + + INSN(sve_ldr, 0b100); // LDR (vector) + INSN(sve_str, 0b111); // STR (vector) +#undef INSN + +#define INSN(NAME, op) \ + void NAME(Register Xd, Register Xn, int imm6) { \ + starti; \ + f(0b000001000, 31, 23), f(op, 22, 21); \ + srf(Xn, 16), f(0b01010, 15, 11), sf(imm6, 10, 5), srf(Xd, 0); \ + } + + INSN(sve_addvl, 0b01); + INSN(sve_addpl, 0b11); +#undef INSN + +// SVE inc/dec register by element count +#define INSN(NAME, op) \ + void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \ + starti; \ + assert(T != Q, "invalid size"); \ + f(0b00000100,31, 24), f(T, 23, 22), f(0b11, 21, 20); \ + f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(op, 10), f(pattern, 9, 5), rf(Xdn, 0); \ + } + + INSN(sve_inc, 0); + INSN(sve_dec, 1); +#undef INSN + + // SVE predicate count + void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) { + starti; + assert(T != Q, "invalid size"); + f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14); + prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0); + } + + // SVE dup scalar + void sve_dup(FloatRegister Zd, SIMD_RegVariant T, Register Rn) { + starti; + assert(T != Q, "invalid size"); + f(0b00000101, 31, 24), f(T, 23, 22), f(0b100000001110, 21, 10); + srf(Rn, 5), rf(Zd, 0); + } + + // SVE dup imm + void sve_dup(FloatRegister Zd, SIMD_RegVariant T, int imm8) { + starti; + assert(T != Q, "invalid size"); + int sh = 0; + if (imm8 <= 127 && imm8 >= -128) { + sh = 0; + } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) { + sh = 1; + imm8 = (imm8 >> 8); + } else { + guarantee(false, "invalid immediate"); + } + f(0b00100101, 31, 24), f(T, 23, 22), f(0b11100011, 21, 14); + f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0); + } + + void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) { + starti; + f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10); + f(pattern, 9, 5), f(0b0, 4), prf(pd, 0); + } + Assembler(CodeBuffer* code) : AbstractAssembler(code) { }
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -431,8 +431,12 @@ ZSetupArguments setup_arguments(masm, stub); __ mov(rscratch1, stub->slow_path()); __ blr(rscratch1); + if (UseSVE > 0) { + // Reinitialize the ptrue predicate register, in case the external runtime + // call clobbers ptrue reg, as we may return to SVE compiled code. + __ reinitialize_ptrue(); + } } - // Stub exit __ b(*stub->continuation()); }
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -99,6 +99,9 @@ "Avoid generating unaligned memory accesses") \ product(bool, UseLSE, false, \ "Use LSE instructions") \ + product(uint, UseSVE, 0, \ + "Highest supported SVE instruction set version") \ + range(0, 2) \ product(bool, UseBlockZeroing, true, \ "Use DC ZVA for block zeroing") \ product(intx, BlockZeroingLowLimit, 256, \
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -2117,9 +2117,16 @@ } // Push lots of registers in the bit set supplied. Don't push sp. -// Return the number of words pushed +// Return the number of dwords pushed int MacroAssembler::push_fp(unsigned int bitset, Register stack) { int words_pushed = 0; + bool use_sve = false; + int sve_vector_size_in_bytes = 0; + +#ifdef COMPILER2 + use_sve = Matcher::supports_scalable_vector(); + sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); +#endif // Scan bitset to accumulate register pairs unsigned char regs[32]; @@ -2134,9 +2141,19 @@ return 0; } + // SVE + if (use_sve && sve_vector_size_in_bytes > 16) { + sub(stack, stack, sve_vector_size_in_bytes * count); + for (int i = 0; i < count; i++) { + sve_str(as_FloatRegister(regs[i]), Address(stack, i)); + } + return count * sve_vector_size_in_bytes / 8; + } + + // NEON if (count == 1) { strq(as_FloatRegister(regs[0]), Address(pre(stack, -wordSize * 2))); - return 1; + return 2; } bool odd = (count & 1) == 1; @@ -2157,12 +2174,19 @@ } assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); - return count; -} - + return count * 2; +} + +// Return the number of dwords poped int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { int words_pushed = 0; - + bool use_sve = false; + int sve_vector_size_in_bytes = 0; + +#ifdef COMPILER2 + use_sve = Matcher::supports_scalable_vector(); + sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); +#endif // Scan bitset to accumulate register pairs unsigned char regs[32]; int count = 0; @@ -2176,9 +2200,19 @@ return 0; } + // SVE + if (use_sve && sve_vector_size_in_bytes > 16) { + for (int i = count - 1; i >= 0; i--) { + sve_ldr(as_FloatRegister(regs[i]), Address(stack, i)); + } + add(stack, stack, sve_vector_size_in_bytes * count); + return count * sve_vector_size_in_bytes / 8; + } + + // NEON if (count == 1) { ldrq(as_FloatRegister(regs[0]), Address(post(stack, wordSize * 2))); - return 1; + return 2; } bool odd = (count & 1) == 1; @@ -2199,7 +2233,7 @@ assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); - return count; + return count * 2; } #ifdef ASSERT @@ -2647,23 +2681,39 @@ pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2) - exclude, sp); } -void MacroAssembler::push_CPU_state(bool save_vectors) { - int step = (save_vectors ? 8 : 4) * wordSize; +void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve, + int sve_vector_size_in_bytes) { push(0x3fffffff, sp); // integer registers except lr & sp - mov(rscratch1, -step); - sub(sp, sp, step); - for (int i = 28; i >= 4; i -= 4) { - st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2), - as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1))); + if (save_vectors && use_sve && sve_vector_size_in_bytes > 16) { + sub(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers); + for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) { + sve_str(as_FloatRegister(i), Address(sp, i)); + } + } else { + int step = (save_vectors ? 8 : 4) * wordSize; + mov(rscratch1, -step); + sub(sp, sp, step); + for (int i = 28; i >= 4; i -= 4) { + st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2), + as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1))); + } + st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp); } - st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp); -} - -void MacroAssembler::pop_CPU_state(bool restore_vectors) { - int step = (restore_vectors ? 8 : 4) * wordSize; - for (int i = 0; i <= 28; i += 4) - ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2), - as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step))); +} + +void MacroAssembler::pop_CPU_state(bool restore_vectors, bool use_sve, + int sve_vector_size_in_bytes) { + if (restore_vectors && use_sve && sve_vector_size_in_bytes > 16) { + for (int i = FloatRegisterImpl::number_of_registers - 1; i >= 0; i--) { + sve_ldr(as_FloatRegister(i), Address(sp, i)); + } + add(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers); + } else { + int step = (restore_vectors ? 8 : 4) * wordSize; + for (int i = 0; i <= 28; i += 4) + ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2), + as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step))); + } pop(0x3fffffff, sp); // integer registers except lr & sp } @@ -2712,6 +2762,21 @@ return Address(base, offset); } +Address MacroAssembler::sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp) { + assert(offset >= 0, "spill to negative address?"); + + Register base = sp; + + // An immediate offset in the range 0 to 255 which is multiplied + // by the current vector or predicate register size in bytes. + if (offset % sve_reg_size_in_bytes == 0 && offset < ((1<<8)*sve_reg_size_in_bytes)) { + return Address(base, offset / sve_reg_size_in_bytes); + } + + add(tmp, base, offset); + return Address(tmp); +} + // Checks whether offset is aligned. // Returns true if it is, else false. bool MacroAssembler::merge_alignment_check(Register base, @@ -5221,3 +5286,24 @@ membar(Assembler::AnyAny); } } + +void MacroAssembler::verify_sve_vector_length() { + Label verify_ok; + assert(UseSVE > 0, "should only be used for SVE"); + movw(rscratch1, zr); + sve_inc(rscratch1, B); + subsw(zr, rscratch1, VM_Version::get_initial_sve_vector_length()); + br(EQ, verify_ok); + stop("Error: SVE vector length has changed since jvm startup"); + bind(verify_ok); +} + +void MacroAssembler::verify_ptrue() { + Label verify_ok; + assert(UseSVE > 0, "should only be used for SVE"); + sve_cntp(rscratch1, B, ptrue, ptrue); // get true elements count. + sve_dec(rscratch1, B); + cbz(rscratch1, verify_ok); + stop("Error: the preserved predicate register (p7) elements are not all true"); + bind(verify_ok); +}
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -873,8 +873,10 @@ DEBUG_ONLY(void verify_heapbase(const char* msg);) - void push_CPU_state(bool save_vectors = false); - void pop_CPU_state(bool restore_vectors = false) ; + void push_CPU_state(bool save_vectors = false, bool use_sve = false, + int sve_vector_size_in_bytes = 0); + void pop_CPU_state(bool restore_vectors = false, bool use_sve = false, + int sve_vector_size_in_bytes = 0); // Round up to a power of two void round_to(Register reg, int modulus); @@ -954,6 +956,11 @@ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + void verify_sve_vector_length(); + void reinitialize_ptrue() { + sve_ptrue(ptrue, B); + } + void verify_ptrue(); // Debugging @@ -1303,6 +1310,7 @@ // Returns an address on the stack which is reachable with a ldr/str of size // Uses rscratch2 if the address is not directly reachable Address spill_address(int size, int offset, Register tmp=rscratch2); + Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2); bool merge_alignment_check(Register base, size_t size, int64_t cur_offset, int64_t prev_offset) const; @@ -1326,6 +1334,9 @@ void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) { str(Vx, T, spill_address(1 << (int)T, offset)); } + void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) { + sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset)); + } void unspill(Register Rx, bool is64, int offset) { if (is64) { ldr(Rx, spill_address(8, offset)); @@ -1336,6 +1347,9 @@ void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) { ldr(Vx, T, spill_address(1 << (int)T, offset)); } + void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) { + sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset)); + } void spill_copy128(int src_offset, int dst_offset, Register tmp1=rscratch1, Register tmp2=rscratch2) { if (src_offset < 512 && (src_offset & 7) == 0 && @@ -1349,7 +1363,15 @@ spill(tmp1, true, dst_offset+8); } } - + void spill_copy_sve_vector_stack_to_stack(int src_offset, int dst_offset, + int sve_vec_reg_size_in_bytes) { + assert(sve_vec_reg_size_in_bytes % 16 == 0, "unexpected sve vector reg size"); + for (int i = 0; i < sve_vec_reg_size_in_bytes / 16; i++) { + spill_copy128(src_offset, dst_offset); + src_offset += 16; + dst_offset += 16; + } + } void cache_wb(Address line); void cache_wbsync(bool is_pre); };
--- a/src/hotspot/cpu/aarch64/register_aarch64.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/register_aarch64.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,6 +33,9 @@ = ConcreteRegisterImpl::max_gpr + FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; +const int ConcreteRegisterImpl::max_pr + = ConcreteRegisterImpl::max_fpr + PRegisterImpl::number_of_registers; + const char* RegisterImpl::name() const { const char* names[number_of_registers] = { "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7", @@ -54,3 +57,11 @@ }; return is_valid() ? names[encoding()] : "noreg"; } + +const char* PRegisterImpl::name() const { + const char* names[number_of_registers] = { + "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", + "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15" + }; + return is_valid() ? names[encoding()] : "noreg"; +}
--- a/src/hotspot/cpu/aarch64/register_aarch64.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/register_aarch64.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -129,9 +129,10 @@ public: enum { number_of_registers = 32, - max_slots_per_register = 4, + max_slots_per_register = 8, save_slots_per_register = 2, - extra_save_slots_per_register = max_slots_per_register - save_slots_per_register + slots_per_neon_register = 4, + extra_save_slots_per_neon_register = slots_per_neon_register - save_slots_per_register }; // construction @@ -187,6 +188,88 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, v30 , (30)); CONSTANT_REGISTER_DECLARATION(FloatRegister, v31 , (31)); +// SVE vector registers, shared with the SIMD&FP v0-v31. Vn maps to Zn[127:0]. +CONSTANT_REGISTER_DECLARATION(FloatRegister, z0 , ( 0)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z1 , ( 1)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z2 , ( 2)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z3 , ( 3)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z4 , ( 4)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z5 , ( 5)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z6 , ( 6)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z7 , ( 7)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z8 , ( 8)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z9 , ( 9)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z10 , (10)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z11 , (11)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z12 , (12)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z13 , (13)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z14 , (14)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z15 , (15)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z16 , (16)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z17 , (17)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z18 , (18)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z19 , (19)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z20 , (20)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z21 , (21)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z22 , (22)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z23 , (23)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z24 , (24)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z25 , (25)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z26 , (26)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z27 , (27)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z28 , (28)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z29 , (29)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z30 , (30)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z31 , (31)); + + +class PRegisterImpl; +typedef PRegisterImpl* PRegister; +inline PRegister as_PRegister(int encoding) { + return (PRegister)(intptr_t)encoding; +} + +// The implementation of predicate registers for the architecture +class PRegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 16, + max_slots_per_register = 1 + }; + + // construction + inline friend PRegister as_PRegister(int encoding); + + VMReg as_VMReg(); + + // derived registers, offsets, and addresses + PRegister successor() const { return as_PRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + int encoding_nocheck() const { return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; +}; + +// The predicate registers of SVE. +CONSTANT_REGISTER_DECLARATION(PRegister, p0, ( 0)); +CONSTANT_REGISTER_DECLARATION(PRegister, p1, ( 1)); +CONSTANT_REGISTER_DECLARATION(PRegister, p2, ( 2)); +CONSTANT_REGISTER_DECLARATION(PRegister, p3, ( 3)); +CONSTANT_REGISTER_DECLARATION(PRegister, p4, ( 4)); +CONSTANT_REGISTER_DECLARATION(PRegister, p5, ( 5)); +CONSTANT_REGISTER_DECLARATION(PRegister, p6, ( 6)); +CONSTANT_REGISTER_DECLARATION(PRegister, p7, ( 7)); +CONSTANT_REGISTER_DECLARATION(PRegister, p8, ( 8)); +CONSTANT_REGISTER_DECLARATION(PRegister, p9, ( 9)); +CONSTANT_REGISTER_DECLARATION(PRegister, p10, (10)); +CONSTANT_REGISTER_DECLARATION(PRegister, p11, (11)); +CONSTANT_REGISTER_DECLARATION(PRegister, p12, (12)); +CONSTANT_REGISTER_DECLARATION(PRegister, p13, (13)); +CONSTANT_REGISTER_DECLARATION(PRegister, p14, (14)); +CONSTANT_REGISTER_DECLARATION(PRegister, p15, (15)); + // Need to know the total number of registers of all sorts for SharedInfo. // Define a class that exports it. class ConcreteRegisterImpl : public AbstractRegisterImpl { @@ -199,12 +282,14 @@ number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + + PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers + 1) // flags }; // added to make it compile static const int max_gpr; static const int max_fpr; + static const int max_pr; }; // A set of registers
--- a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -154,3 +154,55 @@ REGISTER_DEFINITION(Register, rheapbase); REGISTER_DEFINITION(Register, r31_sp); + +REGISTER_DEFINITION(FloatRegister, z0); +REGISTER_DEFINITION(FloatRegister, z1); +REGISTER_DEFINITION(FloatRegister, z2); +REGISTER_DEFINITION(FloatRegister, z3); +REGISTER_DEFINITION(FloatRegister, z4); +REGISTER_DEFINITION(FloatRegister, z5); +REGISTER_DEFINITION(FloatRegister, z6); +REGISTER_DEFINITION(FloatRegister, z7); +REGISTER_DEFINITION(FloatRegister, z8); +REGISTER_DEFINITION(FloatRegister, z9); +REGISTER_DEFINITION(FloatRegister, z10); +REGISTER_DEFINITION(FloatRegister, z11); +REGISTER_DEFINITION(FloatRegister, z12); +REGISTER_DEFINITION(FloatRegister, z13); +REGISTER_DEFINITION(FloatRegister, z14); +REGISTER_DEFINITION(FloatRegister, z15); +REGISTER_DEFINITION(FloatRegister, z16); +REGISTER_DEFINITION(FloatRegister, z17); +REGISTER_DEFINITION(FloatRegister, z18); +REGISTER_DEFINITION(FloatRegister, z19); +REGISTER_DEFINITION(FloatRegister, z20); +REGISTER_DEFINITION(FloatRegister, z21); +REGISTER_DEFINITION(FloatRegister, z22); +REGISTER_DEFINITION(FloatRegister, z23); +REGISTER_DEFINITION(FloatRegister, z24); +REGISTER_DEFINITION(FloatRegister, z25); +REGISTER_DEFINITION(FloatRegister, z26); +REGISTER_DEFINITION(FloatRegister, z27); +REGISTER_DEFINITION(FloatRegister, z28); +REGISTER_DEFINITION(FloatRegister, z29); +REGISTER_DEFINITION(FloatRegister, z30); +REGISTER_DEFINITION(FloatRegister, z31); + +REGISTER_DEFINITION(PRegister, p0); +REGISTER_DEFINITION(PRegister, p1); +REGISTER_DEFINITION(PRegister, p2); +REGISTER_DEFINITION(PRegister, p3); +REGISTER_DEFINITION(PRegister, p4); +REGISTER_DEFINITION(PRegister, p5); +REGISTER_DEFINITION(PRegister, p6); +REGISTER_DEFINITION(PRegister, p7); +REGISTER_DEFINITION(PRegister, p8); +REGISTER_DEFINITION(PRegister, p9); +REGISTER_DEFINITION(PRegister, p10); +REGISTER_DEFINITION(PRegister, p11); +REGISTER_DEFINITION(PRegister, p12); +REGISTER_DEFINITION(PRegister, p13); +REGISTER_DEFINITION(PRegister, p14); +REGISTER_DEFINITION(PRegister, p15); + +REGISTER_DEFINITION(PRegister, ptrue);
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -115,11 +115,28 @@ }; OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { + bool use_sve = false; + int sve_vector_size_in_bytes = 0; + int sve_vector_size_in_slots = 0; + +#ifdef COMPILER2 + use_sve = Matcher::supports_scalable_vector(); + sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); + sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT); +#endif + #if COMPILER2_OR_JVMCI if (save_vectors) { + int vect_words = 0; + int extra_save_slots_per_register = 0; // Save upper half of vector registers - int vect_words = FloatRegisterImpl::number_of_registers * FloatRegisterImpl::extra_save_slots_per_register / - VMRegImpl::slots_per_word; + if (use_sve) { + extra_save_slots_per_register = sve_vector_size_in_slots - FloatRegisterImpl::save_slots_per_register; + } else { + extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register; + } + vect_words = FloatRegisterImpl::number_of_registers * extra_save_slots_per_register / + VMRegImpl::slots_per_word; additional_frame_words += vect_words; } #else @@ -138,7 +155,7 @@ // Save Integer and Float registers. __ enter(); - __ push_CPU_state(save_vectors); + __ push_CPU_state(save_vectors, use_sve, sve_vector_size_in_bytes); // Set an oopmap for the call site. This oopmap will map all // oop-registers and debug-info registers as callee-saved. This @@ -162,8 +179,13 @@ for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) { FloatRegister r = as_FloatRegister(i); - int sp_offset = save_vectors ? (FloatRegisterImpl::max_slots_per_register * i) : - (FloatRegisterImpl::save_slots_per_register * i); + int sp_offset = 0; + if (save_vectors) { + sp_offset = use_sve ? (sve_vector_size_in_slots * i) : + (FloatRegisterImpl::slots_per_neon_register * i); + } else { + sp_offset = FloatRegisterImpl::save_slots_per_register * i; + } oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); } @@ -172,10 +194,15 @@ } void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { -#if !COMPILER2_OR_JVMCI +#ifdef COMPILER2 + __ pop_CPU_state(restore_vectors, Matcher::supports_scalable_vector(), + Matcher::scalable_vector_reg_size(T_BYTE)); +#else +#if !INCLUDE_JVMCI assert(!restore_vectors, "vectors are generated only by C2 and JVMCI"); #endif __ pop_CPU_state(restore_vectors); +#endif __ leave(); } @@ -1842,6 +1869,11 @@ // Force this write out before the read below __ dmb(Assembler::ISH); + if (UseSVE > 0) { + // Make sure that jni code does not change SVE vector length. + __ verify_sve_vector_length(); + } + // check for safepoint operation in progress and/or pending suspend requests Label safepoint_in_progress, safepoint_in_progress_done; { @@ -2774,6 +2806,12 @@ __ maybe_isb(); __ membar(Assembler::LoadLoad | Assembler::LoadStore); + if (UseSVE > 0 && save_vectors) { + // Reinitialize the ptrue predicate register, in case the external runtime + // call clobbers ptrue reg, as we may return to SVE compiled code. + __ reinitialize_ptrue(); + } + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); __ cbz(rscratch1, noException);
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -488,6 +488,11 @@ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), rthread, c_rarg1); + if (UseSVE > 0 ) { + // Reinitialize the ptrue predicate register, in case the external runtime + // call clobbers ptrue reg, as we may return to SVE compiled code. + __ reinitialize_ptrue(); + } // we should not really care that lr is no longer the callee // address. we saved the value the handler needs in r19 so we can // just copy it to r3. however, the C2 handler will push its own @@ -5028,6 +5033,12 @@ __ reset_last_Java_frame(true); __ maybe_isb(); + if (UseSVE > 0) { + // Reinitialize the ptrue predicate register, in case the external runtime + // call clobbers ptrue reg, as we may return to SVE compiled code. + __ reinitialize_ptrue(); + } + __ leave(); // check for pending exceptions
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1372,6 +1372,11 @@ __ push(dtos); __ push(ltos); + if (UseSVE > 0) { + // Make sure that jni code does not change SVE vector length. + __ verify_sve_vector_length(); + } + // change thread state __ mov(rscratch1, _thread_in_native_trans); __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -32,12 +32,14 @@ #include "runtime/os.hpp" #include "runtime/stubCodeGenerator.hpp" #include "runtime/vm_version.hpp" +#include "utilities/formatBuffer.hpp" #include "utilities/macros.hpp" #include OS_HEADER_INLINE(os) +#include <asm/hwcap.h> #include <sys/auxv.h> -#include <asm/hwcap.h> +#include <sys/prctl.h> #ifndef HWCAP_AES #define HWCAP_AES (1<<3) @@ -67,6 +69,20 @@ #define HWCAP_SHA512 (1 << 21) #endif +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif + +#ifndef HWCAP2_SVE2 +#define HWCAP2_SVE2 (1 << 1) +#endif + +#ifndef PR_SVE_GET_VL +// For old toolchains which do not have SVE related macros defined. +#define PR_SVE_SET_VL 50 +#define PR_SVE_GET_VL 51 +#endif + int VM_Version::_cpu; int VM_Version::_model; int VM_Version::_model2; @@ -74,6 +90,7 @@ int VM_Version::_revision; int VM_Version::_stepping; bool VM_Version::_dcpop; +int VM_Version::_initial_sve_vector_length; VM_Version::PsrInfo VM_Version::_psr_info = { 0, }; static BufferBlob* stub_blob; @@ -116,7 +133,6 @@ } }; - void VM_Version::get_processor_features() { _supports_cx8 = true; _supports_atomic_getset4 = true; @@ -167,6 +183,7 @@ } uint64_t auxv = getauxval(AT_HWCAP); + uint64_t auxv2 = getauxval(AT_HWCAP2); char buf[512]; @@ -277,6 +294,12 @@ } } + if (_cpu == CPU_ARM) { + if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { + FLAG_SET_DEFAULT(UseSignumIntrinsic, true); + } + } + if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _features |= CPU_STXR_PREFETCH; // If an olde style /proc/cpuinfo (cpu_lines == 1) then if _model is an A57 (0xd07) // we assume the worst and assume we could be on a big little system and have @@ -292,6 +315,8 @@ if (auxv & HWCAP_SHA2) strcat(buf, ", sha256"); if (auxv & HWCAP_SHA512) strcat(buf, ", sha512"); if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse"); + if (auxv & HWCAP_SVE) strcat(buf, ", sve"); + if (auxv2 & HWCAP2_SVE2) strcat(buf, ", sve2"); _features_string = os::strdup(buf); @@ -431,6 +456,18 @@ FLAG_SET_DEFAULT(UseBlockZeroing, false); } + if (auxv & HWCAP_SVE) { + if (FLAG_IS_DEFAULT(UseSVE)) { + FLAG_SET_DEFAULT(UseSVE, (auxv2 & HWCAP2_SVE2) ? 2 : 1); + } + if (UseSVE > 0) { + _initial_sve_vector_length = prctl(PR_SVE_GET_VL); + } + } else if (UseSVE > 0) { + warning("UseSVE specified, but not supported on current CPU. Disabling SVE."); + FLAG_SET_DEFAULT(UseSVE, 0); + } + // This machine allows unaligned memory accesses if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { FLAG_SET_DEFAULT(UseUnalignedAccesses, true); @@ -465,12 +502,47 @@ UseMontgomerySquareIntrinsic = true; } - int min_vector_size = 8; + if (UseSVE > 0) { + if (FLAG_IS_DEFAULT(MaxVectorSize)) { + MaxVectorSize = _initial_sve_vector_length; + } else if (MaxVectorSize < 16) { + warning("SVE does not support vector length less than 16 bytes. Disabling SVE."); + UseSVE = 0; + } else if ((MaxVectorSize % 16) == 0 && is_power_of_2(MaxVectorSize)) { + int new_vl = prctl(PR_SVE_SET_VL, MaxVectorSize); + _initial_sve_vector_length = new_vl; + // If MaxVectorSize is larger than system largest supported SVE vector length, above prctl() + // call will set task vector length to the system largest supported value. So, we also update + // MaxVectorSize to that largest supported value. + if (new_vl < 0) { + vm_exit_during_initialization( + err_msg("Current system does not support SVE vector length for MaxVectorSize: %d", + (int)MaxVectorSize)); + } else if (new_vl != MaxVectorSize) { + warning("Current system only supports max SVE vector length %d. Set MaxVectorSize to %d", + new_vl, new_vl); + } + MaxVectorSize = new_vl; + } else { + vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize)); + } + } - if (!FLAG_IS_DEFAULT(MaxVectorSize)) { - if (MaxVectorSize < min_vector_size) { - warning("MaxVectorSize must be at least %i on this platform", min_vector_size); - FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); + if (UseSVE == 0) { // NEON + int min_vector_size = 8; + int max_vector_size = 16; + if (!FLAG_IS_DEFAULT(MaxVectorSize)) { + if (!is_power_of_2(MaxVectorSize)) { + vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize)); + } else if (MaxVectorSize < min_vector_size) { + warning("MaxVectorSize must be at least %i on this platform", min_vector_size); + FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); + } else if (MaxVectorSize > max_vector_size) { + warning("MaxVectorSize must be at most %i on this platform", max_vector_size); + FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); + } + } else { + FLAG_SET_DEFAULT(MaxVectorSize, 16); } }
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -41,6 +41,8 @@ static int _revision; static int _stepping; static bool _dcpop; + static int _initial_sve_vector_length; + struct PsrInfo { uint32_t dczid_el0; uint32_t ctr_el0; @@ -106,6 +108,7 @@ static int cpu_variant() { return _variant; } static int cpu_revision() { return _revision; } static bool supports_dcpop() { return _dcpop; } + static int get_initial_sve_vector_length() { return _initial_sve_vector_length; }; static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); } static ByteSize ctr_el0_offset() { return byte_offset_of(PsrInfo, ctr_el0); } static bool is_zva_enabled() {
--- a/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,4 +36,8 @@ ConcreteRegisterImpl::max_gpr); } +inline VMReg PRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_fpr); +} + #endif // CPU_AARCH64_VMREG_AARCH64_INLINE_HPP
--- a/src/hotspot/cpu/arm/arm.ad Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/arm/arm.ad Tue Sep 08 15:28:06 2020 +0800 @@ -1010,6 +1010,14 @@ return MaxVectorSize; } +const bool Matcher::supports_scalable_vector() { + return false; +} + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return -1; +} + // Vector ideal reg corresponding to specified size in bytes const uint Matcher::vector_ideal_reg(int size) { assert(MaxVectorSize >= size, "");
--- a/src/hotspot/cpu/ppc/ppc.ad Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/ppc/ppc.ad Tue Sep 08 15:28:06 2020 +0800 @@ -2383,6 +2383,14 @@ return max_vector_size(bt); // Same as max. } +const bool Matcher::supports_scalable_vector() { + return false; +} + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return -1; +} + // PPC implementation uses VSX load/store instructions (if // SuperwordUseVSX) which support 4 byte but not arbitrary alignment const bool Matcher::misaligned_vectors_ok() {
--- a/src/hotspot/cpu/s390/s390.ad Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/s390/s390.ad Tue Sep 08 15:28:06 2020 +0800 @@ -1614,6 +1614,14 @@ return max_vector_size(bt); // Same as max. } +const bool Matcher::supports_scalable_vector() { + return false; +} + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return -1; +} + // z/Architecture does support misaligned store/load at minimal extra cost. const bool Matcher::misaligned_vectors_ok() { return true;
--- a/src/hotspot/cpu/x86/x86.ad Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/x86/x86.ad Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ // -// Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -1872,6 +1872,14 @@ return MIN2(size,max_size); } +const bool Matcher::supports_scalable_vector() { + return false; +} + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return -1; +} + // Vector ideal reg corresponding to specified size in bytes const uint Matcher::vector_ideal_reg(int size) { assert(MaxVectorSize >= size, "");
--- a/src/hotspot/cpu/x86/x86_64.ad Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/cpu/x86/x86_64.ad Tue Sep 08 15:28:06 2020 +0800 @@ -2834,7 +2834,7 @@ RAX_H_num // Op_RegL }; // Excluded flags and vector registers. - assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type"); + assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type"); return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); %} %}
--- a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -267,7 +267,7 @@ // as to avoid memory stomping of the _mount_path pointer later on in the cgroup v1 // block in the hybrid case. // - if (is_cgroupsV2 && sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s cgroup2 %*s", tmp_mount_point, tmp_fs_type) == 2) { + if (is_cgroupsV2 && sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s %*s %*s", tmp_mount_point, tmp_fs_type) == 2) { // we likely have an early match return (e.g. cgroup fs match), be sure we have cgroup2 as fstype if (!cgroupv2_mount_point_found && strcmp("cgroup2", tmp_fs_type) == 0) { cgroupv2_mount_point_found = true; @@ -289,7 +289,7 @@ * Example for host: * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory */ - if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s cgroup %s", tmproot, tmpmount, tmp_fs_type, tmpcgroups) == 4) { + if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %s", tmproot, tmpmount, tmp_fs_type, tmpcgroups) == 4) { if (strcmp("cgroup", tmp_fs_type) != 0) { // Skip cgroup2 fs lines on hybrid or unified hierarchy. continue;
--- a/src/hotspot/share/adlc/archDesc.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/adlc/archDesc.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ // -// Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -934,6 +934,7 @@ // Match Vector types. if (strncmp(idealOp, "Vec",3)==0) { switch(last_char) { + case 'A': return "TypeVect::VECTA"; case 'S': return "TypeVect::VECTS"; case 'D': return "TypeVect::VECTD"; case 'X': return "TypeVect::VECTX"; @@ -944,6 +945,10 @@ } } + if (strncmp(idealOp, "RegVMask", 8) == 0) { + return "Type::BOTTOM"; + } + // !!!!! switch(last_char) { case 'I': return "TypeInt::INT";
--- a/src/hotspot/share/adlc/formssel.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/adlc/formssel.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -3943,6 +3943,8 @@ strcmp(opType,"RegL")==0 || strcmp(opType,"RegF")==0 || strcmp(opType,"RegD")==0 || + strcmp(opType,"RegVMask")==0 || + strcmp(opType,"VecA")==0 || strcmp(opType,"VecS")==0 || strcmp(opType,"VecD")==0 || strcmp(opType,"VecX")==0 ||
--- a/src/hotspot/share/ci/ciEnv.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/ci/ciEnv.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -761,7 +761,7 @@ InstanceKlass* accessor_klass = accessor->get_instanceKlass(); Klass* holder_klass = holder->get_Klass(); Method* dest_method; - LinkInfo link_info(holder_klass, name, sig, accessor_klass, LinkInfo::AccessCheck::required, tag); + LinkInfo link_info(holder_klass, name, sig, accessor_klass, LinkInfo::AccessCheck::required, LinkInfo::LoaderConstraintCheck::required, tag); switch (bc) { case Bytecodes::_invokestatic: dest_method =
--- a/src/hotspot/share/ci/ciMethod.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/ci/ciMethod.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -809,7 +809,8 @@ Symbol* h_signature = signature()->get_symbol(); LinkInfo link_info(resolved, h_name, h_signature, caller_klass, - check_access ? LinkInfo::AccessCheck::required : LinkInfo::AccessCheck::skip); + check_access ? LinkInfo::AccessCheck::required : LinkInfo::AccessCheck::skip, + check_access ? LinkInfo::LoaderConstraintCheck::required : LinkInfo::LoaderConstraintCheck::skip); Method* m = NULL; // Only do exact lookup if receiver klass has been linked. Otherwise, // the vtable has not been setup, and the LinkResolver will fail.
--- a/src/hotspot/share/classfile/javaClasses.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/classfile/javaClasses.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -1135,7 +1135,11 @@ MN_NESTMATE_CLASS = 0x00000001, MN_HIDDEN_CLASS = 0x00000002, MN_STRONG_LOADER_LINK = 0x00000004, - MN_ACCESS_VM_ANNOTATIONS = 0x00000008 + MN_ACCESS_VM_ANNOTATIONS = 0x00000008, + // Lookup modes + MN_MODULE_MODE = 0x00000010, + MN_UNCONDITIONAL_MODE = 0x00000020, + MN_TRUSTED_MODE = -1 }; // Accessors for code generation:
--- a/src/hotspot/share/classfile/systemDictionary.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/classfile/systemDictionary.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -2830,7 +2830,7 @@ // There's special logic on JDK side to handle them // (see MethodHandles.linkMethodHandleConstant() and MethodHandles.findVirtualForMH()). } else { - MethodHandles::resolve_MemberName(mname, caller, /*speculative_resolve*/false, CHECK_(empty)); + MethodHandles::resolve_MemberName(mname, caller, 0, false /*speculative_resolve*/, CHECK_(empty)); } // After method/field resolution succeeded, it's safe to resolve MH signature as well.
--- a/src/hotspot/share/classfile/vmSymbols.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/classfile/vmSymbols.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -815,6 +815,14 @@ case vmIntrinsics::_isWhitespace: if (!UseCharacterCompareIntrinsics) return true; break; + case vmIntrinsics::_dcopySign: + case vmIntrinsics::_fcopySign: + if (!InlineMathNatives || !UseCopySignIntrinsic) return true; + break; + case vmIntrinsics::_dsignum: + case vmIntrinsics::_fsignum: + if (!InlineMathNatives || !UseSignumIntrinsic) return true; + break; #endif // COMPILER2 default: return false;
--- a/src/hotspot/share/classfile/vmSymbols.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/classfile/vmSymbols.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -789,6 +789,8 @@ do_name(negateExact_name,"negateExact") \ do_name(subtractExact_name,"subtractExact") \ do_name(fma_name, "fma") \ + do_name(copySign_name, "copySign") \ + do_name(signum_name,"signum") \ \ do_intrinsic(_dabs, java_lang_Math, abs_name, double_double_signature, F_S) \ do_intrinsic(_fabs, java_lang_Math, abs_name, float_float_signature, F_S) \ @@ -827,6 +829,10 @@ do_intrinsic(_minF, java_lang_Math, min_name, float2_float_signature, F_S) \ do_intrinsic(_maxD, java_lang_Math, max_name, double2_double_signature, F_S) \ do_intrinsic(_minD, java_lang_Math, min_name, double2_double_signature, F_S) \ + do_intrinsic(_dcopySign, java_lang_Math, copySign_name, double2_double_signature, F_S) \ + do_intrinsic(_fcopySign, java_lang_Math, copySign_name, float2_float_signature, F_S) \ + do_intrinsic(_dsignum, java_lang_Math, signum_name, double_double_signature, F_S) \ + do_intrinsic(_fsignum, java_lang_Math, signum_name, float_float_signature, F_S) \ \ do_intrinsic(_floatToRawIntBits, java_lang_Float, floatToRawIntBits_name, float_int_signature, F_S) \ do_name( floatToRawIntBits_name, "floatToRawIntBits") \
--- a/src/hotspot/share/interpreter/linkResolver.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/interpreter/linkResolver.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -241,6 +241,7 @@ // Coming from the constant pool always checks access _check_access = true; + _check_loader_constraints = true; } LinkInfo::LinkInfo(const constantPoolHandle& pool, int index, TRAPS) { @@ -256,17 +257,20 @@ // Coming from the constant pool always checks access _check_access = true; + _check_loader_constraints = true; } #ifndef PRODUCT void LinkInfo::print() { ResourceMark rm; - tty->print_cr("Link resolved_klass=%s name=%s signature=%s current_klass=%s check_access=%s", + tty->print_cr("Link resolved_klass=%s name=%s signature=%s current_klass=%s check_access=%s check_loader_constraints=%s", _resolved_klass->name()->as_C_string(), _name->as_C_string(), _signature->as_C_string(), _current_klass == NULL ? "(none)" : _current_klass->name()->as_C_string(), - _check_access ? "true" : "false"); + _check_access ? "true" : "false", + _check_loader_constraints ? "true" : "false"); + } #endif // PRODUCT //------------------------------------------------------------------------------------------------------------------------ @@ -795,7 +799,8 @@ resolved_method->method_holder(), resolved_method, CHECK_NULL); - + } + if (link_info.check_loader_constraints()) { // check loader constraints check_method_loader_constraints(link_info, resolved_method, "method", CHECK_NULL); } @@ -891,7 +896,8 @@ resolved_method->method_holder(), resolved_method, CHECK_NULL); - + } + if (link_info.check_loader_constraints()) { check_method_loader_constraints(link_info, resolved_method, "interface method", CHECK_NULL); } @@ -1055,7 +1061,7 @@ } } - if ((sel_klass != current_klass) && (current_klass != NULL)) { + if (link_info.check_loader_constraints() && (sel_klass != current_klass) && (current_klass != NULL)) { check_field_loader_constraints(field, sig, current_klass, sel_klass, CHECK); } @@ -1089,7 +1095,8 @@ // Use updated LinkInfo to reresolve with resolved method holder LinkInfo new_info(resolved_klass, link_info.name(), link_info.signature(), link_info.current_klass(), - link_info.check_access() ? LinkInfo::AccessCheck::required : LinkInfo::AccessCheck::skip); + link_info.check_access() ? LinkInfo::AccessCheck::required : LinkInfo::AccessCheck::skip, + link_info.check_loader_constraints() ? LinkInfo::LoaderConstraintCheck::required : LinkInfo::LoaderConstraintCheck::skip); resolved_method = linktime_resolve_static_method(new_info, CHECK); } @@ -1250,7 +1257,7 @@ ss.print("'"); THROW_MSG(vmSymbols::java_lang_AbstractMethodError(), ss.as_string()); // check loader constraints if found a different method - } else if (sel_method() != resolved_method()) { + } else if (link_info.check_loader_constraints() && sel_method() != resolved_method()) { check_method_loader_constraints(link_info, sel_method, "method", CHECK); } }
--- a/src/hotspot/share/interpreter/linkResolver.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/interpreter/linkResolver.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -141,10 +141,12 @@ Klass* _current_klass; // class that owns the constant pool methodHandle _current_method; // sending method bool _check_access; + bool _check_loader_constraints; constantTag _tag; public: enum class AccessCheck { required, skip }; + enum class LoaderConstraintCheck { required, skip }; LinkInfo(const constantPoolHandle& pool, int index, const methodHandle& current_method, TRAPS); LinkInfo(const constantPoolHandle& pool, int index, TRAPS); @@ -152,33 +154,38 @@ // Condensed information from other call sites within the vm. LinkInfo(Klass* resolved_klass, Symbol* name, Symbol* signature, Klass* current_klass, AccessCheck check_access = AccessCheck::required, + LoaderConstraintCheck check_loader_constraints = LoaderConstraintCheck::required, constantTag tag = JVM_CONSTANT_Invalid) : _name(name), _signature(signature), _resolved_klass(resolved_klass), _current_klass(current_klass), _current_method(methodHandle()), - _check_access(check_access == AccessCheck::required), _tag(tag) {} + _check_access(check_access == AccessCheck::required), + _check_loader_constraints(check_loader_constraints == LoaderConstraintCheck::required), _tag(tag) {} LinkInfo(Klass* resolved_klass, Symbol* name, Symbol* signature, const methodHandle& current_method, AccessCheck check_access = AccessCheck::required, + LoaderConstraintCheck check_loader_constraints = LoaderConstraintCheck::required, constantTag tag = JVM_CONSTANT_Invalid) : _name(name), _signature(signature), _resolved_klass(resolved_klass), _current_klass(current_method->method_holder()), _current_method(current_method), - _check_access(check_access == AccessCheck::required), _tag(tag) {} + _check_access(check_access == AccessCheck::required), + _check_loader_constraints(check_loader_constraints == LoaderConstraintCheck::required), _tag(tag) {} + // Case where we just find the method and don't check access against the current class LinkInfo(Klass* resolved_klass, Symbol*name, Symbol* signature) : _name(name), _signature(signature), _resolved_klass(resolved_klass), _current_klass(NULL), _current_method(methodHandle()), - _check_access(false), _tag(JVM_CONSTANT_Invalid) {} + _check_access(false), _check_loader_constraints(false), _tag(JVM_CONSTANT_Invalid) {} // accessors - Symbol* name() const { return _name; } - Symbol* signature() const { return _signature; } - Klass* resolved_klass() const { return _resolved_klass; } - Klass* current_klass() const { return _current_klass; } - Method* current_method() const { return _current_method(); } - constantTag tag() const { return _tag; } - bool check_access() const { return _check_access; } - + Symbol* name() const { return _name; } + Symbol* signature() const { return _signature; } + Klass* resolved_klass() const { return _resolved_klass; } + Klass* current_klass() const { return _current_klass; } + Method* current_method() const { return _current_method(); } + constantTag tag() const { return _tag; } + bool check_access() const { return _check_access; } + bool check_loader_constraints() const { return _check_loader_constraints; } void print() PRODUCT_RETURN; };
--- a/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTrace.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTrace.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -180,6 +180,7 @@ u4 count = 0; _reached_root = true; + _hash = 1; while (!st.at_end()) { if (count >= _max_frames) { _reached_root = false; @@ -201,7 +202,9 @@ } const int lineno = method->line_number_from_bci(bci); // Can we determine if it's inlined? - _hash = (_hash << 2) + (unsigned int)(((size_t)mid >> 2) + (bci << 4) + type); + _hash = (_hash * 31) + mid; + _hash = (_hash * 31) + bci; + _hash = (_hash * 31) + type; _frames[count] = JfrStackFrame(mid, bci, type, lineno, method->method_holder()); st.samples_next(); count++;
--- a/src/hotspot/share/jvmci/jvmciJavaClasses.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/jvmci/jvmciJavaClasses.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -90,7 +90,7 @@ #ifndef PRODUCT static void check_resolve_method(const char* call_type, Klass* resolved_klass, Symbol* method_name, Symbol* method_signature, TRAPS) { Method* method; - LinkInfo link_info(resolved_klass, method_name, method_signature, NULL, LinkInfo::AccessCheck::skip); + LinkInfo link_info(resolved_klass, method_name, method_signature, NULL, LinkInfo::AccessCheck::skip, LinkInfo::LoaderConstraintCheck::skip); if (strcmp(call_type, "call_static") == 0) { method = LinkResolver::resolve_static_call_or_null(link_info); } else if (strcmp(call_type, "call_virtual") == 0) {
--- a/src/hotspot/share/jvmci/jvmciRuntime.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/jvmci/jvmciRuntime.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1327,7 +1327,7 @@ assert(check_klass_accessibility(accessor, holder), "holder not accessible"); Method* dest_method; - LinkInfo link_info(holder, name, sig, accessor, LinkInfo::AccessCheck::required, tag); + LinkInfo link_info(holder, name, sig, accessor, LinkInfo::AccessCheck::required, LinkInfo::LoaderConstraintCheck::required, tag); switch (bc) { case Bytecodes::_invokestatic: dest_method =
--- a/src/hotspot/share/opto/c2compiler.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/c2compiler.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -458,6 +458,18 @@ case vmIntrinsics::_floor: if (!Matcher::match_rule_supported(Op_RoundDoubleMode)) return false; break; + case vmIntrinsics::_dcopySign: + if (!Matcher::match_rule_supported(Op_CopySignD)) return false; + break; + case vmIntrinsics::_fcopySign: + if (!Matcher::match_rule_supported(Op_CopySignF)) return false; + break; + case vmIntrinsics::_dsignum: + if (!Matcher::match_rule_supported(Op_SignumD)) return false; + break; + case vmIntrinsics::_fsignum: + if (!Matcher::match_rule_supported(Op_SignumF)) return false; + break; case vmIntrinsics::_hashCode: case vmIntrinsics::_identityHashCode: case vmIntrinsics::_getClass:
--- a/src/hotspot/share/opto/chaitin.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/chaitin.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -77,6 +77,7 @@ if( _is_oop ) tty->print("Oop "); if( _is_float ) tty->print("Float "); if( _is_vector ) tty->print("Vector "); + if( _is_scalable ) tty->print("Scalable "); if( _was_spilled1 ) tty->print("Spilled "); if( _was_spilled2 ) tty->print("Spilled2 "); if( _direct_conflict ) tty->print("Direct_conflict "); @@ -644,7 +645,15 @@ // Live ranges record the highest register in their mask. // We want the low register for the AD file writer's convenience. OptoReg::Name hi = lrg.reg(); // Get hi register - OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo + int num_regs = lrg.num_regs(); + if (lrg.is_scalable() && OptoReg::is_stack(hi)) { + // For scalable vector registers, when they are allocated in physical + // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable + // vector. If they are allocated on stack, we need to get the actual + // num_regs, which reflects the physical length of scalable registers. + num_regs = lrg.scalable_reg_slots(); + } + OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo // We have to use pair [lo,lo+1] even for wide vectors because // the rest of code generation works only with pairs. It is safe // since for registers encoding only 'lo' is used. @@ -802,8 +811,19 @@ // Check for vector live range (only if vector register is used). // On SPARC vector uses RegD which could be misaligned so it is not // processes as vector in RA. - if (RegMask::is_vector(ireg)) + if (RegMask::is_vector(ireg)) { lrg._is_vector = 1; + if (ireg == Op_VecA) { + assert(Matcher::supports_scalable_vector(), "scalable vector should be supported"); + lrg._is_scalable = 1; + // For scalable vector, when it is allocated in physical register, + // num_regs is RegMask::SlotsPerVecA for reg mask, + // which may not be the actual physical register size. + // If it is allocated in stack, we need to get the actual + // physical length of scalable vector register. + lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT)); + } + } assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL, "vector must be in vector registers"); @@ -905,6 +925,13 @@ lrg.set_num_regs(1); lrg.set_reg_pressure(1); break; + case Op_VecA: + assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); + assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity"); + assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned"); + lrg.set_num_regs(RegMask::SlotsPerVecA); + lrg.set_reg_pressure(1); + break; case Op_VecS: assert(Matcher::vector_size_supported(T_BYTE,4), "sanity"); assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity"); @@ -1305,6 +1332,46 @@ return false; } +static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) { + int num_regs = lrg.num_regs(); + OptoReg::Name assigned = mask.find_first_set(lrg, num_regs); + + if (lrg.is_scalable()) { + // a physical register is found + if (chunk == 0 && OptoReg::is_reg(assigned)) { + return assigned; + } + + // find available stack slots for scalable register + if (lrg._is_vector) { + num_regs = lrg.scalable_reg_slots(); + // if actual scalable vector register is exactly SlotsPerVecA * 32 bits + if (num_regs == RegMask::SlotsPerVecA) { + return assigned; + } + + // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it + // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits + // instead of SlotsPerVecA bits. + assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg + while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) { + // Verify the found reg has scalable_reg_slots() bits set. + if (mask.is_valid_reg(assigned, num_regs)) { + return assigned; + } else { + // Remove more for each iteration + mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg + mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits + assigned = mask.find_first_set(lrg, num_regs); + } + } + return OptoReg::Bad; // will cause chunk change, and retry next chunk + } + } + + return assigned; +} + // Choose a color using the biasing heuristic OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { @@ -1338,7 +1405,7 @@ RegMask tempmask = lrg.mask(); tempmask.AND(lrgs(copy_lrg).mask()); tempmask.clear_to_sets(lrg.num_regs()); - OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs()); + OptoReg::Name reg = find_first_set(lrg, tempmask, chunk); if (OptoReg::is_valid(reg)) return reg; } @@ -1347,7 +1414,7 @@ // If no bias info exists, just go with the register selection ordering if (lrg._is_vector || lrg.num_regs() == 2) { // Find an aligned set - return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk); + return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk); } // CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate @@ -1402,7 +1469,6 @@ LRG *lrg = &lrgs(lidx); _simplified = lrg->_next; - #ifndef PRODUCT if (trace_spilling()) { ttyLocker ttyl; @@ -1484,7 +1550,6 @@ // Bump register mask up to next stack chunk chunk += RegMask::CHUNK_SIZE; lrg->Set_All(); - goto retry_next_chunk; } @@ -1509,12 +1574,21 @@ int n_regs = lrg->num_regs(); assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity"); if (n_regs == 1 || !lrg->_fat_proj) { - assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); + if (Matcher::supports_scalable_vector()) { + assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity"); + } else { + assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); + } lrg->Clear(); // Clear the mask lrg->Insert(reg); // Set regmask to match selected reg // For vectors and pairs, also insert the low bit of the pair - for (int i = 1; i < n_regs; i++) + // We always choose the high bit, then mask the low bits by register size + if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack + n_regs = lrg->scalable_reg_slots(); + } + for (int i = 1; i < n_regs; i++) { lrg->Insert(OptoReg::add(reg,-i)); + } lrg->set_mask_size(n_regs); } else { // Else fatproj // mask must be equal to fatproj bits, by definition
--- a/src/hotspot/share/opto/chaitin.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/chaitin.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -114,7 +114,9 @@ _msize_valid=1; if (_is_vector) { assert(!_fat_proj, "sanity"); - assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets"); + if (!(_is_scalable && OptoReg::is_stack(_reg))) { + assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets"); + } } else if (_num_regs == 2 && !_fat_proj) { assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs"); } @@ -137,14 +139,37 @@ void Remove( OptoReg::Name reg ) { _mask.Remove(reg); debug_only(_msize_valid=0;) } void clear_to_sets() { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) } +private: // Number of registers this live range uses when it colors -private: uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else // except _num_regs is kill count for fat_proj + + // For scalable register, num_regs may not be the actual physical register size. + // We need to get the actual physical length of scalable register when scalable + // register is spilled. The size of one slot is 32-bit. + uint _scalable_reg_slots; // Actual scalable register length of slots. + // Meaningful only when _is_scalable is true. public: int num_regs() const { return _num_regs; } void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; } + uint scalable_reg_slots() { return _scalable_reg_slots; } + void set_scalable_reg_slots(uint slots) { + assert(_is_scalable, "scalable register"); + assert(slots > 0, "slots of scalable register is not valid"); + _scalable_reg_slots = slots; + } + + bool is_scalable() { +#ifdef ASSERT + if (_is_scalable) { + // Should only be a vector for now, but it could also be a RegVMask in future. + assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg"); + } +#endif + return _is_scalable; + } + private: // Number of physical registers this live range uses when it colors // Architecture and register-set dependent @@ -170,6 +195,8 @@ uint _is_oop:1, // Live-range holds an oop _is_float:1, // True if in float registers _is_vector:1, // True if in vector registers + _is_scalable:1, // True if register size is scalable + // e.g. Arm SVE vector/predicate registers. _was_spilled1:1, // True if prior spilling on def _was_spilled2:1, // True if twice prior spilling on def _is_bound:1, // live range starts life with no
--- a/src/hotspot/share/opto/classes.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/classes.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -292,6 +292,10 @@ shmacro(ShenandoahEnqueueBarrier) shmacro(ShenandoahLoadReferenceBarrier) macro(SCMemProj) +macro(CopySignD) +macro(CopySignF) +macro(SignumD) +macro(SignumF) macro(SqrtD) macro(SqrtF) macro(Start)
--- a/src/hotspot/share/opto/intrinsicnode.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/intrinsicnode.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -98,3 +98,17 @@ return bottom_type(); } +//------------------------------CopySign----------------------------------------- +CopySignDNode* CopySignDNode::make(PhaseGVN& gvn, Node* in1, Node* in2) { + return new CopySignDNode(in1, in2, gvn.makecon(TypeD::ZERO)); +} + +//------------------------------Signum------------------------------------------- +SignumDNode* SignumDNode::make(PhaseGVN& gvn, Node* in) { + return new SignumDNode(in, gvn.makecon(TypeD::ZERO), gvn.makecon(TypeD::ONE)); +} + +SignumFNode* SignumFNode::make(PhaseGVN& gvn, Node* in) { + return new SignumFNode(in, gvn.makecon(TypeF::ZERO), gvn.makecon(TypeF::ONE)); +} +
--- a/src/hotspot/share/opto/intrinsicnode.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/intrinsicnode.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,6 +27,7 @@ #include "opto/node.hpp" #include "opto/opcodes.hpp" +#include "opto/connode.hpp" //----------------------PartialSubtypeCheckNode-------------------------------- @@ -216,4 +217,44 @@ virtual uint ideal_reg() const { return Op_RegI; } }; +//------------------------------CopySign----------------------------------------- +class CopySignDNode : public Node { + protected: + CopySignDNode(Node* in1, Node* in2, Node* in3) : Node(0, in1, in2, in3) {} + public: + static CopySignDNode* make(PhaseGVN& gvn, Node* in1, Node* in2); + virtual int Opcode() const; + const Type* bottom_type() const { return TypeLong::DOUBLE; } + virtual uint ideal_reg() const { return Op_RegD; } +}; + +class CopySignFNode : public Node { + public: + CopySignFNode(Node* in1, Node* in2) : Node(0, in1, in2) {} + virtual int Opcode() const; + const Type* bottom_type() const { return TypeLong::FLOAT; } + virtual uint ideal_reg() const { return Op_RegF; } +}; + +//------------------------------Signum------------------------------------------- +class SignumDNode : public Node { + protected: + SignumDNode(Node* in1, Node* in2, Node* in3) : Node(0, in1, in2, in3) {} + public: + static SignumDNode* make(PhaseGVN& gvn, Node* in); + virtual int Opcode() const; + virtual const Type* bottom_type() const { return Type::DOUBLE; } + virtual uint ideal_reg() const { return Op_RegD; } +}; + +class SignumFNode : public Node { + protected: + SignumFNode(Node* in1, Node* in2, Node* in3) : Node(0, in1, in2, in3) {} + public: + static SignumFNode* make(PhaseGVN& gvn, Node* in); + virtual int Opcode() const; + virtual const Type* bottom_type() const { return Type::FLOAT; } + virtual uint ideal_reg() const { return Op_RegF; } +}; + #endif // SHARE_OPTO_INTRINSICNODE_HPP
--- a/src/hotspot/share/opto/library_call.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/library_call.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -264,7 +264,11 @@ case vmIntrinsics::_dexp: case vmIntrinsics::_dlog: case vmIntrinsics::_dlog10: - case vmIntrinsics::_dpow: return inline_math_native(intrinsic_id()); + case vmIntrinsics::_dpow: + case vmIntrinsics::_dcopySign: + case vmIntrinsics::_fcopySign: + case vmIntrinsics::_dsignum: + case vmIntrinsics::_fsignum: return inline_math_native(intrinsic_id()); case vmIntrinsics::_min: case vmIntrinsics::_max: return inline_min_max(intrinsic_id()); @@ -1567,6 +1571,8 @@ case vmIntrinsics::_ceil: n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_ceil); break; case vmIntrinsics::_floor: n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_floor); break; case vmIntrinsics::_rint: n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_rint); break; + case vmIntrinsics::_dcopySign: n = CopySignDNode::make(_gvn, arg, round_double_node(argument(2))); break; + case vmIntrinsics::_dsignum: n = SignumDNode::make(_gvn, arg); break; default: fatal_unexpected_iid(id); break; } set_result(_gvn.transform(n)); @@ -1584,6 +1590,8 @@ case vmIntrinsics::_fabs: n = new AbsFNode( arg); break; case vmIntrinsics::_iabs: n = new AbsINode( arg); break; case vmIntrinsics::_labs: n = new AbsLNode( arg); break; + case vmIntrinsics::_fcopySign: n = new CopySignFNode(arg, argument(1)); break; + case vmIntrinsics::_fsignum: n = SignumFNode::make(_gvn, arg); break; default: fatal_unexpected_iid(id); break; } set_result(_gvn.transform(n)); @@ -1668,6 +1676,11 @@ } #undef FN_PTR + case vmIntrinsics::_dcopySign: return inline_double_math(id); + case vmIntrinsics::_fcopySign: return inline_math(id); + case vmIntrinsics::_dsignum: return inline_double_math(id); + case vmIntrinsics::_fsignum: return inline_math(id); + // These intrinsics are not yet correctly implemented case vmIntrinsics::_datan2: return false;
--- a/src/hotspot/share/opto/matcher.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/matcher.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -88,6 +88,7 @@ idealreg2spillmask [Op_RegF] = NULL; idealreg2spillmask [Op_RegD] = NULL; idealreg2spillmask [Op_RegP] = NULL; + idealreg2spillmask [Op_VecA] = NULL; idealreg2spillmask [Op_VecS] = NULL; idealreg2spillmask [Op_VecD] = NULL; idealreg2spillmask [Op_VecX] = NULL; @@ -101,6 +102,7 @@ idealreg2debugmask [Op_RegF] = NULL; idealreg2debugmask [Op_RegD] = NULL; idealreg2debugmask [Op_RegP] = NULL; + idealreg2debugmask [Op_VecA] = NULL; idealreg2debugmask [Op_VecS] = NULL; idealreg2debugmask [Op_VecD] = NULL; idealreg2debugmask [Op_VecX] = NULL; @@ -114,6 +116,7 @@ idealreg2mhdebugmask[Op_RegF] = NULL; idealreg2mhdebugmask[Op_RegD] = NULL; idealreg2mhdebugmask[Op_RegP] = NULL; + idealreg2mhdebugmask[Op_VecA] = NULL; idealreg2mhdebugmask[Op_VecS] = NULL; idealreg2mhdebugmask[Op_VecD] = NULL; idealreg2mhdebugmask[Op_VecX] = NULL; @@ -427,7 +430,7 @@ return rms; } -#define NOF_STACK_MASKS (3*11) +#define NOF_STACK_MASKS (3*12) // Create the initial stack mask used by values spilling to the stack. // Disallow any debug info in outgoing argument areas by setting the @@ -463,23 +466,26 @@ idealreg2mhdebugmask[Op_RegD] = &rms[16]; idealreg2mhdebugmask[Op_RegP] = &rms[17]; - idealreg2spillmask [Op_VecS] = &rms[18]; - idealreg2spillmask [Op_VecD] = &rms[19]; - idealreg2spillmask [Op_VecX] = &rms[20]; - idealreg2spillmask [Op_VecY] = &rms[21]; - idealreg2spillmask [Op_VecZ] = &rms[22]; + idealreg2spillmask [Op_VecA] = &rms[18]; + idealreg2spillmask [Op_VecS] = &rms[19]; + idealreg2spillmask [Op_VecD] = &rms[20]; + idealreg2spillmask [Op_VecX] = &rms[21]; + idealreg2spillmask [Op_VecY] = &rms[22]; + idealreg2spillmask [Op_VecZ] = &rms[23]; - idealreg2debugmask [Op_VecS] = &rms[23]; - idealreg2debugmask [Op_VecD] = &rms[24]; - idealreg2debugmask [Op_VecX] = &rms[25]; - idealreg2debugmask [Op_VecY] = &rms[26]; - idealreg2debugmask [Op_VecZ] = &rms[27]; + idealreg2debugmask [Op_VecA] = &rms[24]; + idealreg2debugmask [Op_VecS] = &rms[25]; + idealreg2debugmask [Op_VecD] = &rms[26]; + idealreg2debugmask [Op_VecX] = &rms[27]; + idealreg2debugmask [Op_VecY] = &rms[28]; + idealreg2debugmask [Op_VecZ] = &rms[29]; - idealreg2mhdebugmask[Op_VecS] = &rms[28]; - idealreg2mhdebugmask[Op_VecD] = &rms[29]; - idealreg2mhdebugmask[Op_VecX] = &rms[30]; - idealreg2mhdebugmask[Op_VecY] = &rms[31]; - idealreg2mhdebugmask[Op_VecZ] = &rms[32]; + idealreg2mhdebugmask[Op_VecA] = &rms[30]; + idealreg2mhdebugmask[Op_VecS] = &rms[31]; + idealreg2mhdebugmask[Op_VecD] = &rms[32]; + idealreg2mhdebugmask[Op_VecX] = &rms[33]; + idealreg2mhdebugmask[Op_VecY] = &rms[34]; + idealreg2mhdebugmask[Op_VecZ] = &rms[35]; OptoReg::Name i; @@ -506,6 +512,7 @@ // Keep spill masks aligned. aligned_stack_mask.clear_to_pairs(); assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); + RegMask scalable_stack_mask = aligned_stack_mask; *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; #ifdef _LP64 @@ -591,28 +598,47 @@ *idealreg2spillmask[Op_VecZ] = RegMask::Empty; } - if (UseFPUForSpilling) { - // This mask logic assumes that the spill operations are - // symmetric and that the registers involved are the same size. - // On sparc for instance we may have to use 64 bit moves will - // kill 2 registers when used with F0-F31. - idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]); - idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]); + if (Matcher::supports_scalable_vector()) { + int k = 1; + OptoReg::Name in = OptoReg::add(_in_arg_limit, -1); + // Exclude last input arg stack slots to avoid spilling vector register there, + // otherwise vector spills could stomp over stack slots in caller frame. + for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) { + scalable_stack_mask.Remove(in); + in = OptoReg::add(in, -1); + } + + // For VecA + scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA); + assert(scalable_stack_mask.is_AllStack(), "should be infinite stack"); + *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA]; + idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask); + } else { + *idealreg2spillmask[Op_VecA] = RegMask::Empty; + } + + if (UseFPUForSpilling) { + // This mask logic assumes that the spill operations are + // symmetric and that the registers involved are the same size. + // On sparc for instance we may have to use 64 bit moves will + // kill 2 registers when used with F0-F31. + idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]); + idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]); #ifdef _LP64 - idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]); - idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); - idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); - idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]); + idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]); + idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); + idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); + idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]); #else - idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]); + idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]); #ifdef ARM - // ARM has support for moving 64bit values between a pair of - // integer registers and a double register - idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); - idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); + // ARM has support for moving 64bit values between a pair of + // integer registers and a double register + idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); + idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); #endif #endif - } + } // Make up debug masks. Any spill slot plus callee-save (SOE) registers. // Caller-save (SOC, AS) registers are assumed to be trashable by the various @@ -624,6 +650,7 @@ *idealreg2debugmask [Op_RegD] = *idealreg2spillmask[Op_RegD]; *idealreg2debugmask [Op_RegP] = *idealreg2spillmask[Op_RegP]; + *idealreg2debugmask [Op_VecA] = *idealreg2spillmask[Op_VecA]; *idealreg2debugmask [Op_VecS] = *idealreg2spillmask[Op_VecS]; *idealreg2debugmask [Op_VecD] = *idealreg2spillmask[Op_VecD]; *idealreg2debugmask [Op_VecX] = *idealreg2spillmask[Op_VecX]; @@ -637,6 +664,7 @@ *idealreg2mhdebugmask[Op_RegD] = *idealreg2spillmask[Op_RegD]; *idealreg2mhdebugmask[Op_RegP] = *idealreg2spillmask[Op_RegP]; + *idealreg2mhdebugmask[Op_VecA] = *idealreg2spillmask[Op_VecA]; *idealreg2mhdebugmask[Op_VecS] = *idealreg2spillmask[Op_VecS]; *idealreg2mhdebugmask[Op_VecD] = *idealreg2spillmask[Op_VecD]; *idealreg2mhdebugmask[Op_VecX] = *idealreg2spillmask[Op_VecX]; @@ -656,6 +684,7 @@ idealreg2debugmask[Op_RegD]->SUBTRACT(*caller_save_mask); idealreg2debugmask[Op_RegP]->SUBTRACT(*caller_save_mask); + idealreg2debugmask[Op_VecA]->SUBTRACT(*caller_save_mask); idealreg2debugmask[Op_VecS]->SUBTRACT(*caller_save_mask); idealreg2debugmask[Op_VecD]->SUBTRACT(*caller_save_mask); idealreg2debugmask[Op_VecX]->SUBTRACT(*caller_save_mask); @@ -669,6 +698,7 @@ idealreg2mhdebugmask[Op_RegD]->SUBTRACT(*mh_caller_save_mask); idealreg2mhdebugmask[Op_RegP]->SUBTRACT(*mh_caller_save_mask); + idealreg2mhdebugmask[Op_VecA]->SUBTRACT(*mh_caller_save_mask); idealreg2mhdebugmask[Op_VecS]->SUBTRACT(*mh_caller_save_mask); idealreg2mhdebugmask[Op_VecD]->SUBTRACT(*mh_caller_save_mask); idealreg2mhdebugmask[Op_VecX]->SUBTRACT(*mh_caller_save_mask); @@ -929,6 +959,7 @@ idealreg2regmask[Op_RegF] = regmask_for_ideal_register(Op_RegF, ret); idealreg2regmask[Op_RegD] = regmask_for_ideal_register(Op_RegD, ret); idealreg2regmask[Op_RegL] = regmask_for_ideal_register(Op_RegL, ret); + idealreg2regmask[Op_VecA] = regmask_for_ideal_register(Op_VecA, ret); idealreg2regmask[Op_VecS] = regmask_for_ideal_register(Op_VecS, ret); idealreg2regmask[Op_VecD] = regmask_for_ideal_register(Op_VecD, ret); idealreg2regmask[Op_VecX] = regmask_for_ideal_register(Op_VecX, ret); @@ -1614,7 +1645,6 @@ } } - // Call DFA to match this node, and return svec->DFA( n->Opcode(), n ); @@ -2323,6 +2353,23 @@ n->del_req(3); break; } + case Op_MulAddS2I: { + Node* pair1 = new BinaryNode(n->in(1), n->in(2)); + Node* pair2 = new BinaryNode(n->in(3), n->in(4)); + n->set_req(1, pair1); + n->set_req(2, pair2); + n->del_req(4); + n->del_req(3); + break; + } + case Op_CopySignD: + case Op_SignumF: + case Op_SignumD: { + Node* pair = new BinaryNode(n->in(2), n->in(3)); + n->set_req(2, pair); + n->del_req(3); + break; + } case Op_VectorBlend: case Op_VectorInsert: { Node* pair = new BinaryNode(n->in(1), n->in(2)); @@ -2337,23 +2384,14 @@ n->del_req(MemNode::ValueIn+1); break; } - case Op_MulAddS2I: { - Node* pair1 = new BinaryNode(n->in(1), n->in(2)); - Node* pair2 = new BinaryNode(n->in(3), n->in(4)); - n->set_req(1, pair1); - n->set_req(2, pair2); - n->del_req(4); - n->del_req(3); - break; - } case Op_VectorMaskCmp: { n->set_req(1, new BinaryNode(n->in(1), n->in(2))); n->set_req(2, n->in(3)); n->del_req(3); break; - } default: break; + } } } @@ -2483,7 +2521,7 @@ const RegMask* Matcher::regmask_for_ideal_register(uint ideal_reg, Node* ret) { const Type* t = Type::mreg2type[ideal_reg]; if (t == NULL) { - assert(ideal_reg >= Op_VecS && ideal_reg <= Op_VecZ, "not a vector: %d", ideal_reg); + assert(ideal_reg >= Op_VecA && ideal_reg <= Op_VecZ, "not a vector: %d", ideal_reg); return NULL; // not supported } Node* fp = ret->in(TypeFunc::FramePtr); @@ -2500,6 +2538,7 @@ case Op_RegD: spill = new LoadDNode(NULL, mem, fp, atp, t, mo); break; case Op_RegL: spill = new LoadLNode(NULL, mem, fp, atp, t->is_long(), mo); break; + case Op_VecA: // fall-through case Op_VecS: // fall-through case Op_VecD: // fall-through case Op_VecX: // fall-through
--- a/src/hotspot/share/opto/matcher.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/matcher.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -338,6 +338,10 @@ Matcher::min_vector_size(bt) <= size); } + static const bool supports_scalable_vector(); + // Actual max scalable vector register length. + static const int scalable_vector_reg_size(const BasicType bt); + // Vector ideal reg static const uint vector_ideal_reg(int len);
--- a/src/hotspot/share/opto/opcodes.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/opcodes.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,12 +38,14 @@ "RegF", "RegD", "RegL", - "RegFlags", + "VecA", "VecS", "VecD", "VecX", "VecY", "VecZ", + "RegVMask", + "RegFlags", "_last_machine_leaf", #include "classes.hpp" "_last_class_name",
--- a/src/hotspot/share/opto/opcodes.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/opcodes.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -37,11 +37,13 @@ macro(RegF) // Machine float register macro(RegD) // Machine double register macro(RegL) // Machine long register + macro(VecA) // Machine vectora register macro(VecS) // Machine vectors register macro(VecD) // Machine vectord register macro(VecX) // Machine vectorx register macro(VecY) // Machine vectory register macro(VecZ) // Machine vectorz register + macro(RegVMask) // Vector mask/predicate register macro(RegFlags) // Machine flags register _last_machine_leaf, // Split between regular opcodes and machine #include "classes.hpp"
--- a/src/hotspot/share/opto/postaloc.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/postaloc.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -266,9 +266,9 @@ Node *val = skip_copies(n->in(k)); if (val == x) return blk_adjust; // No progress? - int n_regs = RegMask::num_registers(val->ideal_reg()); uint val_idx = _lrg_map.live_range_id(val); OptoReg::Name val_reg = lrgs(val_idx).reg(); + int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx)); // See if it happens to already be in the correct register! // (either Phi's direct register, or the common case of the name @@ -305,8 +305,26 @@ } Node *vv = value[reg]; + // For scalable register, number of registers may be inconsistent between + // "val_reg" and "reg". For example, when "val" resides in register + // but "reg" is located in stack. + if (lrgs(val_idx).is_scalable()) { + assert(val->ideal_reg() == Op_VecA, "scalable vector register"); + if (OptoReg::is_stack(reg)) { + n_regs = lrgs(val_idx).scalable_reg_slots(); + } else { + n_regs = RegMask::SlotsPerVecA; + } + } if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set - uint last = (n_regs-1); // Looking for the last part of a set + uint last; + if (lrgs(val_idx).is_scalable()) { + assert(val->ideal_reg() == Op_VecA, "scalable vector register"); + // For scalable vector register, regmask is always SlotsPerVecA bits aligned + last = RegMask::SlotsPerVecA - 1; + } else { + last = (n_regs-1); // Looking for the last part of a set + } if ((reg&last) != last) continue; // Wrong part of a set if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value } @@ -591,7 +609,7 @@ uint k; Node *phi = block->get_node(j); uint pidx = _lrg_map.live_range_id(phi); - OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg(); + OptoReg::Name preg = lrgs(pidx).reg(); // Remove copies remaining on edges. Check for junk phi. Node *u = NULL; @@ -619,7 +637,7 @@ if( pidx ) { value.map(preg,phi); regnd.map(preg,phi); - int n_regs = RegMask::num_registers(phi->ideal_reg()); + int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx)); for (int l = 1; l < n_regs; l++) { OptoReg::Name preg_lo = OptoReg::add(preg,-l); value.map(preg_lo,phi); @@ -663,7 +681,7 @@ regnd.map(ureg, def); // Record other half of doubles uint def_ideal_reg = def->ideal_reg(); - int n_regs = RegMask::num_registers(def_ideal_reg); + int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def))); for (int l = 1; l < n_regs; l++) { OptoReg::Name ureg_lo = OptoReg::add(ureg,-l); if (!value[ureg_lo] && @@ -707,7 +725,7 @@ } uint n_ideal_reg = n->ideal_reg(); - int n_regs = RegMask::num_registers(n_ideal_reg); + int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx)); if (n_regs == 1) { // If Node 'n' does not change the value mapped by the register, // then 'n' is a useless copy. Do not update the register->node
--- a/src/hotspot/share/opto/regmask.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/regmask.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "opto/ad.hpp" +#include "opto/chaitin.hpp" #include "opto/compile.hpp" #include "opto/matcher.hpp" #include "opto/node.hpp" @@ -59,30 +60,47 @@ //============================================================================= bool RegMask::is_vector(uint ireg) { - return (ireg == Op_VecS || ireg == Op_VecD || + return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ); } int RegMask::num_registers(uint ireg) { switch(ireg) { case Op_VecZ: - return 16; + return SlotsPerVecZ; case Op_VecY: - return 8; + return SlotsPerVecY; case Op_VecX: - return 4; + return SlotsPerVecX; case Op_VecD: + return SlotsPerVecD; case Op_RegD: case Op_RegL: #ifdef _LP64 case Op_RegP: #endif return 2; + case Op_VecA: + assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); + return SlotsPerVecA; } // Op_VecS and the rest ideal registers. return 1; } +int RegMask::num_registers(uint ireg, LRG &lrg) { + int n_regs = num_registers(ireg); + + // assigned is OptoReg which is selected by register allocator + OptoReg::Name assigned = lrg.reg(); + assert(OptoReg::is_valid(assigned), "should be valid opto register"); + + if (lrg.is_scalable() && OptoReg::is_stack(assigned)) { + n_regs = lrg.scalable_reg_slots(); + } + return n_regs; +} + // Clear out partial bits; leave only bit pairs void RegMask::clear_to_pairs() { assert(valid_watermarks(), "sanity"); @@ -157,6 +175,16 @@ } return false; } +// Check that whether given reg number with size is valid +// for current regmask, where reg is the highest number. +bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const { + for (int i = 0; i < size; i++) { + if (!Member(reg - i)) { + return false; + } + } + return true; +} // only indicies of power 2 are accessed, so index 3 is only filled in for storage. static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 }; @@ -164,8 +192,13 @@ // Find the lowest-numbered register set in the mask. Return the // HIGHEST register number in the set, or BAD if no sets. // Works also for size 1. -OptoReg::Name RegMask::find_first_set(const int size) const { - assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); +OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const { + if (lrg.is_scalable()) { + // For scalable vector register, regmask is SlotsPerVecA bits aligned. + assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets"); + } else { + assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); + } assert(valid_watermarks(), "sanity"); for (int i = _lwm; i <= _hwm; i++) { if (_A[i]) { // Found some bits @@ -245,12 +278,16 @@ while (bits) { // Check bits for pairing int bit = bits & -bits; // Extract low bit // Low bit is not odd means its mis-aligned. - if ((bit & low_bits_mask) == 0) return false; + if ((bit & low_bits_mask) == 0) { + return false; + } // Do extra work since (bit << size) may overflow. int hi_bit = bit << (size-1); // high bit int set = hi_bit + ((hi_bit-1) & ~(bit-1)); // Check for aligned adjacent bits in this set - if ((bits & set) != set) return false; + if ((bits & set) != set) { + return false; + } bits -= set; // Remove this set } }
--- a/src/hotspot/share/opto/regmask.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/regmask.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,6 +30,8 @@ #include "utilities/count_leading_zeros.hpp" #include "utilities/count_trailing_zeros.hpp" +class LRG; + //-------------Non-zero bit search methods used by RegMask--------------------- // Find lowest 1, undefined if empty/0 static int find_lowest_bit(uint32_t mask) { @@ -91,11 +93,13 @@ // requirement is internal to the allocator, and independent of any // particular platform. enum { SlotsPerLong = 2, + SlotsPerVecA = 8, SlotsPerVecS = 1, SlotsPerVecD = 2, SlotsPerVecX = 4, SlotsPerVecY = 8, - SlotsPerVecZ = 16 }; + SlotsPerVecZ = 16, + }; // A constructor only used by the ADLC output. All mask fields are filled // in directly. Calls to this look something like RM(1,2,3,4); @@ -219,10 +223,14 @@ // Test for a single adjacent set of ideal register's size. bool is_bound(uint ireg) const; + // Check that whether given reg number with size is valid + // for current regmask, where reg is the highest number. + bool is_valid_reg(OptoReg::Name reg, const int size) const; + // Find the lowest-numbered register set in the mask. Return the // HIGHEST register number in the set, or BAD if no sets. // Assert that the mask contains only bit sets. - OptoReg::Name find_first_set(const int size) const; + OptoReg::Name find_first_set(LRG &lrg, const int size) const; // Clear out partial bits; leave only aligned adjacent bit sets of size. void clear_to_sets(const int size); @@ -236,6 +244,7 @@ static bool is_vector(uint ireg); static int num_registers(uint ireg); + static int num_registers(uint ireg, LRG &lrg); // Fast overlap test. Non-zero if any registers in common. int overlap(const RegMask &rm) const {
--- a/src/hotspot/share/opto/superword.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/superword.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -94,8 +94,11 @@ //------------------------------transform_loop--------------------------- void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { assert(UseSuperWord, "should be"); - // Do vectors exist on this architecture? - if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return; + // SuperWord only works with power of two vector sizes. + int vector_width = Matcher::vector_width_in_bytes(T_BYTE); + if (vector_width < 2 || !is_power_of_2(vector_width)) { + return; + } assert(lpt->_head->is_CountedLoop(), "must be"); CountedLoopNode *cl = lpt->_head->as_CountedLoop();
--- a/src/hotspot/share/opto/type.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/type.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -74,6 +74,7 @@ { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ #else // all other + { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA. { Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX @@ -654,6 +655,10 @@ // get_zero_type() should not happen for T_CONFLICT _zero_type[T_CONFLICT]= NULL; + if (Matcher::supports_scalable_vector()) { + TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE)); + } + // Vector predefined types, it needs initialized _const_basic_type[]. if (Matcher::vector_size_supported(T_BYTE,4)) { TypeVect::VECTS = TypeVect::make(T_BYTE,4); @@ -670,6 +675,8 @@ if (Matcher::vector_size_supported(T_FLOAT,16)) { TypeVect::VECTZ = TypeVect::make(T_FLOAT,16); } + + mreg2type[Op_VecA] = TypeVect::VECTA; mreg2type[Op_VecS] = TypeVect::VECTS; mreg2type[Op_VecD] = TypeVect::VECTD; mreg2type[Op_VecX] = TypeVect::VECTX; @@ -989,6 +996,7 @@ Bad, // Tuple - handled in v-call Bad, // Array - handled in v-call + Bad, // VectorA - handled in v-call Bad, // VectorS - handled in v-call Bad, // VectorD - handled in v-call Bad, // VectorX - handled in v-call @@ -1897,7 +1905,6 @@ const TypeTuple *TypeTuple::INT_CC_PAIR; const TypeTuple *TypeTuple::LONG_CC_PAIR; - //------------------------------make------------------------------------------- // Make a TypeTuple from the range of a method signature const TypeTuple *TypeTuple::make_range(ciSignature* sig) { @@ -2268,6 +2275,7 @@ //==============================TypeVect======================================= // Convenience common pre-built types. +const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors @@ -2278,10 +2286,11 @@ const TypeVect* TypeVect::make(const Type *elem, uint length) { BasicType elem_bt = elem->array_element_basic_type(); assert(is_java_primitive(elem_bt), "only primitive types in vector"); - assert(length > 1 && is_power_of_2(length), "vector length is power of 2"); assert(Matcher::vector_size_supported(elem_bt, length), "length in range"); int size = length * type2aelembytes(elem_bt); switch (Matcher::vector_ideal_reg(size)) { + case Op_VecA: + return (TypeVect*)(new TypeVectA(elem, length))->hashcons(); case Op_VecS: return (TypeVect*)(new TypeVectS(elem, length))->hashcons(); case Op_RegL: @@ -2313,7 +2322,7 @@ default: // All else is a mistake typerr(t); - + case VectorA: case VectorS: case VectorD: case VectorX: @@ -2368,6 +2377,8 @@ #ifndef PRODUCT void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const { switch (base()) { + case VectorA: + st->print("vectora["); break; case VectorS: st->print("vectors["); break; case VectorD:
--- a/src/hotspot/share/opto/type.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/type.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -53,6 +53,7 @@ class TypeAry; class TypeTuple; class TypeVect; +class TypeVectA; class TypeVectS; class TypeVectD; class TypeVectX; @@ -87,6 +88,7 @@ Tuple, // Method signature or object layout Array, // Array types + VectorA, // (Scalable) Vector types for vector length agnostic VectorS, // 32bit Vector types VectorD, // 64bit Vector types VectorX, // 128bit Vector types @@ -765,6 +767,7 @@ virtual const Type *xmeet( const Type *t) const; virtual const Type *xdual() const; // Compute dual right now. + static const TypeVect *VECTA; static const TypeVect *VECTS; static const TypeVect *VECTD; static const TypeVect *VECTX; @@ -776,6 +779,11 @@ #endif }; +class TypeVectA : public TypeVect { + friend class TypeVect; + TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {} +}; + class TypeVectS : public TypeVect { friend class TypeVect; TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {} @@ -1630,12 +1638,12 @@ } inline const TypeVect *Type::is_vect() const { - assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" ); + assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" ); return (TypeVect*)this; } inline const TypeVect *Type::isa_vect() const { - return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL; + return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL; } inline const TypePtr *Type::is_ptr() const {
--- a/src/hotspot/share/opto/vectornode.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/opto/vectornode.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1085,7 +1085,7 @@ (vlen > 1) && is_power_of_2(vlen) && Matcher::vector_size_supported(bt, vlen)) { int vopc = ReductionNode::opcode(opc, bt); - return vopc != opc && Matcher::match_rule_supported(vopc); + return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt); } return false; }
--- a/src/hotspot/share/prims/jvmtiEnv.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/prims/jvmtiEnv.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1714,15 +1714,18 @@ // shall be posted for this PopFrame. // It is only safe to perform the direct operation on the current - // thread. All other usage needs to use a vm-safepoint-op for safety. - if (java_thread == JavaThread::current()) { - state->update_for_pop_top_frame(); - } else { - VM_UpdateForPopTopFrame op(state); - VMThread::execute(&op); - jvmtiError err = op.result(); - if (err != JVMTI_ERROR_NONE) { - return err; + // thread. All other usage needs to use a handshake for safety. + { + MutexLocker mu(JvmtiThreadState_lock); + if (java_thread == JavaThread::current()) { + state->update_for_pop_top_frame(); + } else { + UpdateForPopTopFrameClosure op(state); + bool executed = Handshake::execute_direct(&op, java_thread); + jvmtiError err = executed ? op.result() : JVMTI_ERROR_THREAD_NOT_ALIVE; + if (err != JVMTI_ERROR_NONE) { + return err; + } } } @@ -1796,13 +1799,14 @@ // It is only safe to perform the direct operation on the current // thread. All other usage needs to use a vm-safepoint-op for safety. + MutexLocker mu(JvmtiThreadState_lock); if (java_thread == JavaThread::current()) { int frame_number = state->count_frames() - depth; state->env_thread_state(this)->set_frame_pop(frame_number); } else { - VM_SetFramePop op(this, state, depth); - VMThread::execute(&op); - err = op.result(); + SetFramePopClosure op(this, state, depth); + bool executed = Handshake::execute_direct(&op, java_thread); + err = executed ? op.result() : JVMTI_ERROR_THREAD_NOT_ALIVE; } return err; } /* end NotifyFramePop */
--- a/src/hotspot/share/prims/jvmtiEnvBase.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/prims/jvmtiEnvBase.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1504,25 +1504,23 @@ } void -VM_UpdateForPopTopFrame::doit() { +UpdateForPopTopFrameClosure::do_thread(Thread *target) { JavaThread* jt = _state->get_thread(); - ThreadsListHandle tlh; - if (jt != NULL && tlh.includes(jt) && !jt->is_exiting() && jt->threadObj() != NULL) { + assert(jt == target, "just checking"); + if (!jt->is_exiting() && jt->threadObj() != NULL) { _state->update_for_pop_top_frame(); - } else { - _result = JVMTI_ERROR_THREAD_NOT_ALIVE; + _result = JVMTI_ERROR_NONE; } } void -VM_SetFramePop::doit() { +SetFramePopClosure::do_thread(Thread *target) { JavaThread* jt = _state->get_thread(); - ThreadsListHandle tlh; - if (jt != NULL && tlh.includes(jt) && !jt->is_exiting() && jt->threadObj() != NULL) { + assert(jt == target, "just checking"); + if (!jt->is_exiting() && jt->threadObj() != NULL) { int frame_number = _state->count_frames() - _depth; _state->env_thread_state((JvmtiEnvBase*)_env)->set_frame_pop(frame_number); - } else { - _result = JVMTI_ERROR_THREAD_NOT_ALIVE; + _result = JVMTI_ERROR_NONE; } }
--- a/src/hotspot/share/prims/jvmtiEnvBase.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/prims/jvmtiEnvBase.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -336,24 +336,23 @@ JvmtiEnv* next(JvmtiEnvBase* env) { return env->next_environment(); } }; -// VM operation to update for pop top frame. -class VM_UpdateForPopTopFrame : public VM_Operation { +// HandshakeClosure to update for pop top frame. +class UpdateForPopTopFrameClosure : public HandshakeClosure { private: JvmtiThreadState* _state; jvmtiError _result; public: - VM_UpdateForPopTopFrame(JvmtiThreadState* state) { - _state = state; - _result = JVMTI_ERROR_NONE; - } - VMOp_Type type() const { return VMOp_UpdateForPopTopFrame; } + UpdateForPopTopFrameClosure(JvmtiThreadState* state) + : HandshakeClosure("UpdateForPopTopFrame"), + _state(state), + _result(JVMTI_ERROR_THREAD_NOT_ALIVE) {} jvmtiError result() { return _result; } - void doit(); + void do_thread(Thread *target); }; -// VM operation to set frame pop. -class VM_SetFramePop : public VM_Operation { +// HandshakeClosure to set frame pop. +class SetFramePopClosure : public HandshakeClosure { private: JvmtiEnv *_env; JvmtiThreadState* _state; @@ -361,15 +360,14 @@ jvmtiError _result; public: - VM_SetFramePop(JvmtiEnv *env, JvmtiThreadState* state, jint depth) { - _env = env; - _state = state; - _depth = depth; - _result = JVMTI_ERROR_NONE; - } - VMOp_Type type() const { return VMOp_SetFramePop; } + SetFramePopClosure(JvmtiEnv *env, JvmtiThreadState* state, jint depth) + : HandshakeClosure("SetFramePop"), + _env(env), + _state(state), + _depth(depth), + _result(JVMTI_ERROR_THREAD_NOT_ALIVE) {} jvmtiError result() { return _result; } - void doit(); + void do_thread(Thread *target); };
--- a/src/hotspot/share/prims/jvmtiEnvThreadState.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/prims/jvmtiEnvThreadState.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -191,8 +191,11 @@ JvmtiFramePops* JvmtiEnvThreadState::get_frame_pops() { - assert(get_thread() == Thread::current() || SafepointSynchronize::is_at_safepoint(), - "frame pop data only accessible from same thread or at safepoint"); +#ifdef ASSERT + Thread *current = Thread::current(); +#endif + assert(get_thread() == current || current == get_thread()->active_handshaker(), + "frame pop data only accessible from same thread or direct handshake"); if (_frame_pops == NULL) { _frame_pops = new JvmtiFramePops(); assert(_frame_pops != NULL, "_frame_pops != NULL"); @@ -206,32 +209,44 @@ } void JvmtiEnvThreadState::set_frame_pop(int frame_number) { - assert(get_thread() == Thread::current() || SafepointSynchronize::is_at_safepoint(), - "frame pop data only accessible from same thread or at safepoint"); +#ifdef ASSERT + Thread *current = Thread::current(); +#endif + assert(get_thread() == current || current == get_thread()->active_handshaker(), + "frame pop data only accessible from same thread or direct handshake"); JvmtiFramePop fpop(frame_number); JvmtiEventController::set_frame_pop(this, fpop); } void JvmtiEnvThreadState::clear_frame_pop(int frame_number) { - assert(get_thread() == Thread::current() || SafepointSynchronize::is_at_safepoint(), - "frame pop data only accessible from same thread or at safepoint"); +#ifdef ASSERT + Thread *current = Thread::current(); +#endif + assert(get_thread() == current || current == get_thread()->active_handshaker(), + "frame pop data only accessible from same thread or direct handshake"); JvmtiFramePop fpop(frame_number); JvmtiEventController::clear_frame_pop(this, fpop); } void JvmtiEnvThreadState::clear_to_frame_pop(int frame_number) { - assert(get_thread() == Thread::current() || SafepointSynchronize::is_at_safepoint(), - "frame pop data only accessible from same thread or at safepoint"); +#ifdef ASSERT + Thread *current = Thread::current(); +#endif + assert(get_thread() == current || current == get_thread()->active_handshaker(), + "frame pop data only accessible from same thread or direct handshake"); JvmtiFramePop fpop(frame_number); JvmtiEventController::clear_to_frame_pop(this, fpop); } bool JvmtiEnvThreadState::is_frame_pop(int cur_frame_number) { - assert(get_thread() == Thread::current() || SafepointSynchronize::is_at_safepoint(), - "frame pop data only accessible from same thread or at safepoint"); +#ifdef ASSERT + Thread *current = Thread::current(); +#endif + assert(get_thread() == current || current == get_thread()->active_handshaker(), + "frame pop data only accessible from same thread or direct handshake"); if (!get_thread()->is_interp_only_mode() || _frame_pops == NULL) { return false; } @@ -240,25 +255,25 @@ } -class VM_GetCurrentLocation : public VM_Operation { +class GetCurrentLocationClosure : public HandshakeClosure { private: - JavaThread *_thread; jmethodID _method_id; int _bci; public: - VM_GetCurrentLocation(JavaThread *thread) { - _thread = thread; - } - VMOp_Type type() const { return VMOp_GetCurrentLocation; } - void doit() { - ResourceMark rmark; // _thread != Thread::current() - RegisterMap rm(_thread, false); + GetCurrentLocationClosure() + : HandshakeClosure("GetCurrentLocation"), + _method_id(NULL), + _bci(0) {} + void do_thread(Thread *target) { + JavaThread *jt = (JavaThread *)target; + ResourceMark rmark; // jt != Thread::current() + RegisterMap rm(jt, false); // There can be a race condition between a VM_Operation reaching a safepoint // and the target thread exiting from Java execution. // We must recheck the last Java frame still exists. - if (!_thread->is_exiting() && _thread->has_last_Java_frame()) { - javaVFrame* vf = _thread->last_java_vframe(&rm); + if (!jt->is_exiting() && jt->has_last_Java_frame()) { + javaVFrame* vf = jt->last_java_vframe(&rm); assert(vf != NULL, "must have last java frame"); Method* method = vf->method(); _method_id = method->jmethod_id(); @@ -307,9 +322,15 @@ jmethodID method_id; int bci; // The java thread stack may not be walkable for a running thread - // so get current location at safepoint. - VM_GetCurrentLocation op(_thread); - VMThread::execute(&op); + // so get current location with direct handshake. + GetCurrentLocationClosure op; + Thread *current = Thread::current(); + if (current == _thread || _thread->active_handshaker() == current) { + op.do_thread(_thread); + } else { + bool executed = Handshake::execute_direct(&op, _thread); + guarantee(executed, "Direct handshake failed. Target thread is not alive?"); + } op.get_current_location(&method_id, &bci); set_current_location(method_id, bci); }
--- a/src/hotspot/share/prims/jvmtiEventController.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/prims/jvmtiEventController.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -331,10 +331,14 @@ EC_TRACE(("[%s] # Entering interpreter only mode", JvmtiTrace::safe_get_thread_name(state->get_thread()))); EnterInterpOnlyModeClosure hs; - if (SafepointSynchronize::is_at_safepoint()) { - hs.do_thread(state->get_thread()); + assert(state->get_thread()->is_Java_thread(), "just checking"); + JavaThread *target = (JavaThread *)state->get_thread(); + Thread *current = Thread::current(); + if (target == current || target->active_handshaker() == current) { + hs.do_thread(target); } else { - Handshake::execute_direct(&hs, state->get_thread()); + bool executed = Handshake::execute_direct(&hs, target); + guarantee(executed, "Direct handshake failed. Target thread is not alive?"); } } @@ -980,21 +984,21 @@ void JvmtiEventController::set_frame_pop(JvmtiEnvThreadState *ets, JvmtiFramePop fpop) { - MutexLocker mu(SafepointSynchronize::is_at_safepoint() ? NULL : JvmtiThreadState_lock); + assert_lock_strong(JvmtiThreadState_lock); JvmtiEventControllerPrivate::set_frame_pop(ets, fpop); } void JvmtiEventController::clear_frame_pop(JvmtiEnvThreadState *ets, JvmtiFramePop fpop) { - MutexLocker mu(SafepointSynchronize::is_at_safepoint() ? NULL : JvmtiThreadState_lock); + assert_lock_strong(JvmtiThreadState_lock); JvmtiEventControllerPrivate::clear_frame_pop(ets, fpop); } void JvmtiEventController::clear_to_frame_pop(JvmtiEnvThreadState *ets, JvmtiFramePop fpop) { - MutexLocker mu(SafepointSynchronize::is_at_safepoint() ? NULL : JvmtiThreadState_lock); + assert_lock_strong(JvmtiThreadState_lock); JvmtiEventControllerPrivate::clear_to_frame_pop(ets, fpop); }
--- a/src/hotspot/share/prims/jvmtiExport.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/prims/jvmtiExport.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1645,7 +1645,10 @@ } } // remove the frame's entry - ets->clear_frame_pop(cur_frame_number); + { + MutexLocker mu(JvmtiThreadState_lock); + ets->clear_frame_pop(cur_frame_number); + } } } }
--- a/src/hotspot/share/prims/jvmtiThreadState.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/prims/jvmtiThreadState.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -272,9 +272,9 @@ } int JvmtiThreadState::cur_stack_depth() { - guarantee(SafepointSynchronize::is_at_safepoint() || - (JavaThread *)Thread::current() == get_thread(), - "must be current thread or at safepoint"); + Thread *current = Thread::current(); + guarantee(current == get_thread() || current == get_thread()->active_handshaker(), + "must be current thread or direct handshake"); if (!is_interp_only_mode() || _cur_stack_depth == UNKNOWN_STACK_DEPTH) { _cur_stack_depth = count_frames();
--- a/src/hotspot/share/prims/methodHandles.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/prims/methodHandles.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -132,6 +132,9 @@ REFERENCE_KIND_MASK = java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK, SEARCH_SUPERCLASSES = java_lang_invoke_MemberName::MN_SEARCH_SUPERCLASSES, SEARCH_INTERFACES = java_lang_invoke_MemberName::MN_SEARCH_INTERFACES, + LM_UNCONDITIONAL = java_lang_invoke_MemberName::MN_UNCONDITIONAL_MODE, + LM_MODULE = java_lang_invoke_MemberName::MN_MODULE_MODE, + LM_TRUSTED = java_lang_invoke_MemberName::MN_TRUSTED_MODE, ALL_KINDS = IS_METHOD | IS_CONSTRUCTOR | IS_FIELD | IS_TYPE }; @@ -672,11 +675,10 @@ return NULL; } - // An unresolved member name is a mere symbolic reference. // Resolving it plants a vmtarget/vmindex in it, // which refers directly to JVM internals. -Handle MethodHandles::resolve_MemberName(Handle mname, Klass* caller, +Handle MethodHandles::resolve_MemberName(Handle mname, Klass* caller, int lookup_mode, bool speculative_resolve, TRAPS) { Handle empty; assert(java_lang_invoke_MemberName::is_instance(mname()), ""); @@ -745,16 +747,21 @@ TempNewSymbol type = lookup_signature(type_str(), (mh_invoke_id != vmIntrinsics::_none), CHECK_(empty)); if (type == NULL) return empty; // no such signature exists in the VM + // skip access check if it's trusted lookup LinkInfo::AccessCheck access_check = caller != NULL ? LinkInfo::AccessCheck::required : LinkInfo::AccessCheck::skip; + // skip loader constraints if it's trusted lookup or a public lookup + LinkInfo::LoaderConstraintCheck loader_constraint_check = (caller != NULL && (lookup_mode & LM_UNCONDITIONAL) == 0) ? + LinkInfo::LoaderConstraintCheck::required : + LinkInfo::LoaderConstraintCheck::skip; // Time to do the lookup. switch (flags & ALL_KINDS) { case IS_METHOD: { CallInfo result; - LinkInfo link_info(defc, name, type, caller, access_check); + LinkInfo link_info(defc, name, type, caller, access_check, loader_constraint_check); { assert(!HAS_PENDING_EXCEPTION, ""); if (ref_kind == JVM_REF_invokeStatic) { @@ -795,7 +802,7 @@ case IS_CONSTRUCTOR: { CallInfo result; - LinkInfo link_info(defc, name, type, caller, access_check); + LinkInfo link_info(defc, name, type, caller, access_check, loader_constraint_check); { assert(!HAS_PENDING_EXCEPTION, ""); if (name == vmSymbols::object_initializer_name()) { @@ -820,7 +827,7 @@ fieldDescriptor result; // find_field initializes fd if found { assert(!HAS_PENDING_EXCEPTION, ""); - LinkInfo link_info(defc, name, type, caller, LinkInfo::AccessCheck::skip); + LinkInfo link_info(defc, name, type, caller, LinkInfo::AccessCheck::skip, loader_constraint_check); LinkResolver::resolve_field(result, link_info, Bytecodes::_nop, false, THREAD); if (HAS_PENDING_EXCEPTION) { if (speculative_resolve) { @@ -1117,6 +1124,9 @@ template(java_lang_invoke_MemberName,MN_HIDDEN_CLASS) \ template(java_lang_invoke_MemberName,MN_STRONG_LOADER_LINK) \ template(java_lang_invoke_MemberName,MN_ACCESS_VM_ANNOTATIONS) \ + template(java_lang_invoke_MemberName,MN_MODULE_MODE) \ + template(java_lang_invoke_MemberName,MN_UNCONDITIONAL_MODE) \ + template(java_lang_invoke_MemberName,MN_TRUSTED_MODE) \ /*end*/ #define IGNORE_REQ(req_expr) /* req_expr */ @@ -1190,13 +1200,17 @@ // void resolve(MemberName self, Class<?> caller) JVM_ENTRY(jobject, MHN_resolve_Mem(JNIEnv *env, jobject igcls, jobject mname_jh, jclass caller_jh, - jboolean speculative_resolve)) { + jint lookup_mode, jboolean speculative_resolve)) { if (mname_jh == NULL) { THROW_MSG_NULL(vmSymbols::java_lang_InternalError(), "mname is null"); } Handle mname(THREAD, JNIHandles::resolve_non_null(mname_jh)); // The trusted Java code that calls this method should already have performed // access checks on behalf of the given caller. But, we can verify this. - if (VerifyMethodHandles && caller_jh != NULL && + // This only verifies from the context of the lookup class. It does not + // verify the lookup context for a Lookup object teleported from one module + // to another. Such Lookup object can only access the intersection of the set + // of accessible classes from both lookup class and previous lookup class. + if (VerifyMethodHandles && (lookup_mode & LM_TRUSTED) == LM_TRUSTED && caller_jh != NULL && java_lang_invoke_MemberName::clazz(mname()) != NULL) { Klass* reference_klass = java_lang_Class::as_Klass(java_lang_invoke_MemberName::clazz(mname())); if (reference_klass != NULL && reference_klass->is_objArray_klass()) { @@ -1207,18 +1221,25 @@ if (reference_klass != NULL && reference_klass->is_instance_klass()) { // Emulate LinkResolver::check_klass_accessability. Klass* caller = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(caller_jh)); - if (caller != SystemDictionary::Object_klass() + // access check on behalf of the caller if this is not a public lookup + // i.e. lookup mode is not UNCONDITIONAL + if ((lookup_mode & LM_UNCONDITIONAL) == 0 && Reflection::verify_class_access(caller, InstanceKlass::cast(reference_klass), true) != Reflection::ACCESS_OK) { - THROW_MSG_NULL(vmSymbols::java_lang_InternalError(), reference_klass->external_name()); + ResourceMark rm(THREAD); + stringStream ss; + ss.print("caller %s tried to access %s", caller->class_in_module_of_loader(), + reference_klass->class_in_module_of_loader()); + THROW_MSG_NULL(vmSymbols::java_lang_InternalError(), ss.as_string()); } } } Klass* caller = caller_jh == NULL ? NULL : java_lang_Class::as_Klass(JNIHandles::resolve_non_null(caller_jh)); - Handle resolved = MethodHandles::resolve_MemberName(mname, caller, speculative_resolve == JNI_TRUE, + Handle resolved = MethodHandles::resolve_MemberName(mname, caller, lookup_mode, + speculative_resolve == JNI_TRUE, CHECK_NULL); if (resolved.is_null()) { @@ -1518,7 +1539,7 @@ static JNINativeMethod MHN_methods[] = { {CC "init", CC "(" MEM "" OBJ ")V", FN_PTR(MHN_init_Mem)}, {CC "expand", CC "(" MEM ")V", FN_PTR(MHN_expand_Mem)}, - {CC "resolve", CC "(" MEM "" CLS "Z)" MEM, FN_PTR(MHN_resolve_Mem)}, + {CC "resolve", CC "(" MEM "" CLS "IZ)" MEM, FN_PTR(MHN_resolve_Mem)}, // static native int getNamedCon(int which, Object[] name) {CC "getNamedCon", CC "(I[" OBJ ")I", FN_PTR(MHN_getNamedCon)}, // static native int getMembers(Class<?> defc, String matchName, String matchSig,
--- a/src/hotspot/share/prims/methodHandles.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/prims/methodHandles.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -60,7 +60,7 @@ public: // working with member names - static Handle resolve_MemberName(Handle mname, Klass* caller, + static Handle resolve_MemberName(Handle mname, Klass* caller, int lookup_mode, bool speculative_resolve, TRAPS); // compute vmtarget/vmindex from name/type static void expand_MemberName(Handle mname, int suppress, TRAPS); // expand defc/name/type if missing static oop init_MemberName(Handle mname_h, Handle target_h, TRAPS); // compute vmtarget/vmindex from target
--- a/src/hotspot/share/runtime/globals.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/runtime/globals.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -347,6 +347,12 @@ diagnostic(bool, UseVectorizedMismatchIntrinsic, false, \ "Enables intrinsification of ArraysSupport.vectorizedMismatch()") \ \ + diagnostic(bool, UseCopySignIntrinsic, false, \ + "Enables intrinsification of Math.copySign") \ + \ + diagnostic(bool, UseSignumIntrinsic, false, \ + "Enables intrinsification of Math.signum") \ + \ diagnostic(ccstrlist, DisableIntrinsic, "", \ "do not expand intrinsics whose (internal) names appear here") \ \
--- a/src/hotspot/share/runtime/handshake.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/runtime/handshake.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -383,9 +383,9 @@ _operation_direct(NULL), _handshake_turn_sem(1), _processing_sem(1), - _thread_in_process_handshake(false) + _thread_in_process_handshake(false), + _active_handshaker(NULL) { - DEBUG_ONLY(_active_handshaker = NULL;) } void HandshakeState::set_operation(HandshakeOperation* op) { @@ -510,9 +510,9 @@ if (can_process_handshake()) { guarantee(!_processing_sem.trywait(), "we should already own the semaphore"); log_trace(handshake)("Processing handshake by %s", Thread::current()->is_VM_thread() ? "VMThread" : "Handshaker"); - DEBUG_ONLY(_active_handshaker = Thread::current();) + _active_handshaker = Thread::current(); op->do_handshake(_handshakee); - DEBUG_ONLY(_active_handshaker = NULL;) + _active_handshaker = NULL; // Disarm after we have executed the operation. clear_handshake(is_direct); pr = _success;
--- a/src/hotspot/share/runtime/handshake.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/runtime/handshake.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -106,11 +106,8 @@ }; ProcessResult try_process(HandshakeOperation* op); -#ifdef ASSERT Thread* _active_handshaker; Thread* active_handshaker() const { return _active_handshaker; } -#endif - }; #endif // SHARE_RUNTIME_HANDSHAKE_HPP
--- a/src/hotspot/share/runtime/thread.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/runtime/thread.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -2613,9 +2613,17 @@ void JavaThread::java_suspend_self_with_safepoint_check() { assert(this == Thread::current(), "invariant"); JavaThreadState state = thread_state(); - set_thread_state(_thread_blocked); - java_suspend_self(); - set_thread_state_fence(state); + + do { + set_thread_state(_thread_blocked); + java_suspend_self(); + // The current thread could have been suspended again. We have to check for + // suspend after restoring the saved state. Without this the current thread + // might return to _thread_in_Java and execute bytecodes for an arbitrary + // long time. + set_thread_state_fence(state); + } while (is_external_suspend()); + // Since we are not using a regular thread-state transition helper here, // we must manually emit the instruction barrier after leaving a safe state. OrderAccess::cross_modify_fence();
--- a/src/hotspot/share/runtime/thread.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/runtime/thread.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -1365,11 +1365,9 @@ return _handshake.try_process(op); } -#ifdef ASSERT Thread* active_handshaker() const { return _handshake.active_handshaker(); } -#endif // Suspend/resume support for JavaThread private:
--- a/src/hotspot/share/runtime/vmOperations.hpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/runtime/vmOperations.hpp Tue Sep 08 15:28:06 2020 +0800 @@ -76,14 +76,11 @@ template(PopulateDumpSharedSpace) \ template(JNIFunctionTableCopier) \ template(RedefineClasses) \ - template(UpdateForPopTopFrame) \ - template(SetFramePop) \ template(GetObjectMonitorUsage) \ template(GetAllStackTraces) \ template(GetThreadListStackTraces) \ template(ChangeBreakpoints) \ template(GetOrSetLocal) \ - template(GetCurrentLocation) \ template(ChangeSingleStep) \ template(HeapWalkOperation) \ template(HeapIterateOperation) \
--- a/src/hotspot/share/runtime/vmStructs.cpp Wed Sep 02 20:33:29 2020 -0700 +++ b/src/hotspot/share/runtime/vmStructs.cpp Tue Sep 08 15:28:06 2020 +0800 @@ -1849,6 +1849,10 @@ declare_c2_type(OverflowMulLNode, OverflowLNode) \ declare_c2_type(FmaDNode, Node) \ declare_c2_type(FmaFNode, Node) \ + declare_c2_type(CopySignDNode, Node) \ + declare_c2_type(CopySignFNode, Node) \ + declare_c2_type(SignumDNode, Node) \ + declare_c2_type(SignumFNode, Node) \ declare_c2_type(LoadVectorGatherNode, LoadVectorNode) \ declare_c2_type(StoreVectorScatterNode, StoreVectorNode) \ declare_c2_type(VectorLoadMaskNode, VectorNode) \
--- a/src/java.base/share/classes/java/lang/Math.java Wed Sep 02 20:33:29 2020 -0700 +++ b/src/java.base/share/classes/java/lang/Math.java Tue Sep 08 15:28:06 2020 +0800 @@ -1977,6 +1977,7 @@ * @author Joseph D. Darcy * @since 1.5 */ + @HotSpotIntrinsicCandidate public static double signum(double d) { return (d == 0.0 || Double.isNaN(d))?d:copySign(1.0, d); } @@ -1998,6 +1999,7 @@ * @author Joseph D. Darcy * @since 1.5 */ + @HotSpotIntrinsicCandidate public static float signum(float f) { return (f == 0.0f || Float.isNaN(f))?f:copySign(1.0f, f); } @@ -2218,6 +2220,7 @@ * and the sign of {@code sign}. * @since 1.6 */ + @HotSpotIntrinsicCandidate public static double copySign(double magnitude, double sign) { return Double.longBitsToDouble((Double.doubleToRawLongBits(sign) & (DoubleConsts.SIGN_BIT_MASK)) | @@ -2241,6 +2244,7 @@ * and the sign of {@code sign}. * @since 1.6 */ + @HotSpotIntrinsicCandidate public static float copySign(float magnitude, float sign) { return Float.intBitsToFloat((Float.floatToRawIntBits(sign) & (FloatConsts.SIGN_BIT_MASK)) |
--- a/src/java.base/share/classes/java/lang/invoke/ClassSpecializer.java Wed Sep 02 20:33:29 2020 -0700 +++ b/src/java.base/share/classes/java/lang/invoke/ClassSpecializer.java Tue Sep 08 15:28:06 2020 +0800 @@ -46,6 +46,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; +import static java.lang.invoke.GenerateJLIClassesHelper.traceSpeciesType; import static java.lang.invoke.LambdaForm.*; import static java.lang.invoke.MethodHandleNatives.Constants.REF_getStatic; import static java.lang.invoke.MethodHandleNatives.Constants.REF_putStatic; @@ -475,15 +476,8 @@ Class<?> salvage = null; try { salvage = BootLoader.loadClassOrNull(className); - if (TRACE_RESOLVE && salvage != null) { - // Used by jlink species pregeneration plugin, see - // jdk.tools.jlink.internal.plugins.GenerateJLIClassesPlugin - System.out.println("[SPECIES_RESOLVE] " + className + " (salvaged)"); - } + traceSpeciesType(className, salvage); } catch (Error ex) { - if (TRACE_RESOLVE) { - System.out.println("[SPECIES_FRESOLVE] " + className + " (Error) " + ex.getMessage()); - } } final Class<? extends T> speciesCode; if (salvage != null) { @@ -494,19 +488,12 @@ // Not pregenerated, generate the class try { speciesCode = generateConcreteSpeciesCode(className, speciesData); - if (TRACE_RESOLVE) { - // Used by jlink species pregeneration plugin, see - // jdk.tools.jlink.internal.plugins.GenerateJLIClassesPlugin - System.out.println("[SPECIES_RESOLVE] " + className + " (generated)"); - } + traceSpeciesType(className, salvage); // This operation causes a lot of churn: linkSpeciesDataToCode(speciesData, speciesCode); // This operation commits the relation, but causes little churn: linkCodeToSpeciesData(speciesCode, speciesData, false); } catch (Error ex) { - if (TRACE_RESOLVE) { - System.out.println("[SPECIES_RESOLVE] " + className + " (Error #2)" ); - } // We can get here if there is a race condition loading a class. // Or maybe we are out of resources. Back out of the CHM.get and retry. throw ex;
--- a/src/java.base/share/classes/java/lang/invoke/DelegatingMethodHandle.java Wed Sep 02 20:33:29 2020 -0700 +++ b/src/java.base/share/classes/java/lang/invoke/DelegatingMethodHandle.java Tue Sep 08 15:28:06 2020 +0800 @@ -28,7 +28,7 @@ import java.util.Arrays; import static java.lang.invoke.LambdaForm.*; import static java.lang.invoke.LambdaForm.Kind.*; -import static java.lang.invoke.MethodHandleNatives.Constants.REF_invokeVirtual; +import static java.lang.invoke.MethodHandleNatives.Constants.*; import static java.lang.invoke.MethodHandleStatics.*; /** @@ -177,7 +177,7 @@ MethodType.methodType(MethodHandle.class), REF_invokeVirtual); NF_getTarget = new NamedFunction( MemberName.getFactory() - .resolveOrFail(REF_invokeVirtual, member, DelegatingMethodHandle.class, NoSuchMethodException.class)); + .resolveOrFail(REF_invokeVirtual, member, DelegatingMethodHandle.class, LM_TRUSTED, NoSuchMethodException.class)); } catch (ReflectiveOperationException ex) { throw newInternalError(ex); }
--- a/src/java.base/share/classes/java/lang/invoke/DirectMethodHandle.java Wed Sep 02 20:33:29 2020 -0700 +++ b/src/java.base/share/classes/java/lang/invoke/DirectMethodHandle.java Tue Sep 08 15:28:06 2020 +0800 @@ -63,7 +63,7 @@ member.isMethod() && !member.isAbstract()) { // Check for corner case: invokeinterface of Object method MemberName m = new MemberName(Object.class, member.getName(), member.getMethodType(), member.getReferenceKind()); - m = MemberName.getFactory().resolveOrNull(m.getReferenceKind(), m, null); + m = MemberName.getFactory().resolveOrNull(m.getReferenceKind(), m, null, LM_TRUSTED); if (m != null && m.isPublic()) { assert(member.getReferenceKind() == m.getReferenceKind()); // else this.form is wrong member = m; @@ -260,7 +260,8 @@ .changeReturnType(void.class); // <init> returns void MemberName linker = new MemberName(MethodHandle.class, linkerName, mtypeWithArg, REF_invokeStatic); try { - linker = IMPL_NAMES.resolveOrFail(REF_invokeStatic, linker, null, NoSuchMethodException.class); + linker = IMPL_NAMES.resolveOrFail(REF_invokeStatic, linker, null, LM_TRUSTED, + NoSuchMethodException.class); } catch (ReflectiveOperationException ex) { throw newInternalError(ex); } @@ -771,7 +772,8 @@ linkerType = MethodType.methodType(void.class, Object.class, long.class, ft); MemberName linker = new MemberName(Unsafe.class, kind.methodName, linkerType, REF_invokeVirtual); try { - linker = IMPL_NAMES.resolveOrFail(REF_invokeVirtual, linker, null, NoSuchMethodException.class); + linker = IMPL_NAMES.resolveOrFail(REF_invokeVirtual, linker, null, LM_TRUSTED, + NoSuchMethodException.class); } catch (ReflectiveOperationException ex) { throw newInternalError(ex); } @@ -914,13 +916,15 @@ case NF_UNSAFE: MemberName member = new MemberName(MethodHandleStatics.class, "UNSAFE", Unsafe.class, REF_getField); return new NamedFunction( - MemberName.getFactory() - .resolveOrFail(REF_getField, member, DirectMethodHandle.class, NoSuchMethodException.class)); + MemberName.getFactory().resolveOrFail(REF_getField, member, + DirectMethodHandle.class, LM_TRUSTED, + NoSuchMethodException.class)); case NF_checkReceiver: member = new MemberName(DirectMethodHandle.class, "checkReceiver", OBJ_OBJ_TYPE, REF_invokeVirtual); return new NamedFunction( - MemberName.getFactory() - .resolveOrFail(REF_invokeVirtual, member, DirectMethodHandle.class, NoSuchMethodException.class)); + MemberName.getFactory().resolveOrFail(REF_invokeVirtual, member, + DirectMethodHandle.class, LM_TRUSTED, + NoSuchMethodException.class)); default: throw newInternalError("Unknown function: " + func); } @@ -934,8 +938,9 @@ { MemberName member = new MemberName(DirectMethodHandle.class, name, type, REF_invokeStatic); return new NamedFunction( - MemberName.getFactory() - .resolveOrFail(REF_invokeStatic, member, DirectMethodHandle.class, NoSuchMethodException.class)); + MemberName.getFactory().resolveOrFail(REF_invokeStatic, member, + DirectMethodHandle.class, LM_TRUSTED, + NoSuchMethodException.class)); } static {
--- a/src/java.base/share/classes/java/lang/invoke/GenerateJLIClassesHelper.java Wed Sep 02 20:33:29 2020 -0700 +++ b/src/java.base/share/classes/java/lang/invoke/GenerateJLIClassesHelper.java Tue Sep 08 15:28:06 2020 +0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,16 +32,328 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.stream.Stream; -import static java.lang.invoke.MethodTypeForm.LF_INVINTERFACE; -import static java.lang.invoke.MethodTypeForm.LF_INVVIRTUAL; +import static java.lang.invoke.LambdaForm.basicTypeSignature; +import static java.lang.invoke.LambdaForm.shortenSignature; +import static java.lang.invoke.LambdaForm.BasicType.*; +import static java.lang.invoke.MethodHandleStatics.TRACE_RESOLVE; +import static java.lang.invoke.MethodTypeForm.*; +import static java.lang.invoke.LambdaForm.Kind.*; /** * Helper class to assist the GenerateJLIClassesPlugin to get access to * generate classes ahead of time. */ class GenerateJLIClassesHelper { + private static final String LF_RESOLVE = "[LF_RESOLVE]"; + private static final String SPECIES_RESOLVE = "[SPECIES_RESOLVE]"; + static void traceLambdaForm(String name, MethodType type, Class<?> holder, MemberName resolvedMember) { + if (TRACE_RESOLVE) { + System.out.println(LF_RESOLVE + " " + holder.getName() + " " + name + " " + + shortenSignature(basicTypeSignature(type)) + + (resolvedMember != null ? " (success)" : " (fail)")); + } + } + + static void traceSpeciesType(String cn, Class<?> salvage) { + if (TRACE_RESOLVE) { + System.out.println(SPECIES_RESOLVE + " " + cn + (salvage != null ? " (salvaged)" : " (generated)")); + } + } + + // Map from DirectMethodHandle method type name to index to LambdForms + static final Map<String, Integer> DMH_METHOD_TYPE_MAP = + Map.of( + DIRECT_INVOKE_VIRTUAL.methodName, LF_INVVIRTUAL, + DIRECT_INVOKE_STATIC.methodName, LF_INVSTATIC, + DIRECT_INVOKE_SPECIAL.methodName, LF_INVSPECIAL, + DIRECT_NEW_INVOKE_SPECIAL.methodName, LF_NEWINVSPECIAL, + DIRECT_INVOKE_INTERFACE.methodName, LF_INVINTERFACE, + DIRECT_INVOKE_STATIC_INIT.methodName, LF_INVSTATIC_INIT, + DIRECT_INVOKE_SPECIAL_IFC.methodName, LF_INVSPECIAL_IFC + ); + + static final String DIRECT_HOLDER = "java/lang/invoke/DirectMethodHandle$Holder"; + static final String DELEGATING_HOLDER = "java/lang/invoke/DelegatingMethodHandle$Holder"; + static final String BASIC_FORMS_HOLDER = "java/lang/invoke/LambdaForm$Holder"; + static final String INVOKERS_HOLDER = "java/lang/invoke/Invokers$Holder"; + static final String INVOKERS_HOLDER_CLASS_NAME = INVOKERS_HOLDER.replace('/', '.'); + static final String BMH_SPECIES_PREFIX = "java.lang.invoke.BoundMethodHandle$Species_"; + + static class HolderClassBuilder { + + + private final TreeSet<String> speciesTypes = new TreeSet<>(); + private final TreeSet<String> invokerTypes = new TreeSet<>(); + private final TreeSet<String> callSiteTypes = new TreeSet<>(); + private final Map<String, Set<String>> dmhMethods = new TreeMap<>(); + + HolderClassBuilder addSpeciesType(String type) { + speciesTypes.add(expandSignature(type)); + return this; + } + + HolderClassBuilder addInvokerType(String methodType) { + validateMethodType(methodType); + invokerTypes.add(methodType); + return this; + } + + HolderClassBuilder addCallSiteType(String csType) { + validateMethodType(csType); + callSiteTypes.add(csType); + return this; + } + + Map<String, byte[]> build() { + int count = 0; + for (Set<String> entry : dmhMethods.values()) { + count += entry.size(); + } + MethodType[] directMethodTypes = new MethodType[count]; + int[] dmhTypes = new int[count]; + int index = 0; + for (Map.Entry<String, Set<String>> entry : dmhMethods.entrySet()) { + String dmhType = entry.getKey(); + for (String type : entry.getValue()) { + // The DMH type to actually ask for is retrieved by removing + // the first argument, which needs to be of Object.class + MethodType mt = asMethodType(type); + if (mt.parameterCount() < 1 || + mt.parameterType(0) != Object.class) { + throw new RuntimeException( + "DMH type parameter must start with L: " + dmhType + " " + type); + } + + // Adapt the method type of the LF to retrieve + directMethodTypes[index] = mt.dropParameterTypes(0, 1); + + // invokeVirtual and invokeInterface must have a leading Object + // parameter, i.e., the receiver + dmhTypes[index] = DMH_METHOD_TYPE_MAP.get(dmhType); + if (dmhTypes[index] == LF_INVINTERFACE || dmhTypes[index] == LF_INVVIRTUAL) { + if (mt.parameterCount() < 2 || + mt.parameterType(1) != Object.class) { + throw new RuntimeException( + "DMH type parameter must start with LL: " + dmhType + " " + type); + } + } + index++; + } + } + + // The invoker type to ask for is retrieved by removing the first + // and the last argument, which needs to be of Object.class + MethodType[] invokerMethodTypes = new MethodType[invokerTypes.size()]; + index = 0; + for (String invokerType : invokerTypes) { + MethodType mt = asMethodType(invokerType); + final int lastParam = mt.parameterCount() - 1; + if (mt.parameterCount() < 2 || + mt.parameterType(0) != Object.class || + mt.parameterType(lastParam) != Object.class) { + throw new RuntimeException( + "Invoker type parameter must start and end with Object: " + invokerType); + } + mt = mt.dropParameterTypes(lastParam, lastParam + 1); + invokerMethodTypes[index] = mt.dropParameterTypes(0, 1); + index++; + } + + // The callSite type to ask for is retrieved by removing the last + // argument, which needs to be of Object.class + MethodType[] callSiteMethodTypes = new MethodType[callSiteTypes.size()]; + index = 0; + for (String callSiteType : callSiteTypes) { + MethodType mt = asMethodType(callSiteType); + final int lastParam = mt.parameterCount() - 1; + if (mt.parameterCount() < 1 || + mt.parameterType(lastParam) != Object.class) { + throw new RuntimeException( + "CallSite type parameter must end with Object: " + callSiteType); + } + callSiteMethodTypes[index] = mt.dropParameterTypes(lastParam, lastParam + 1); + index++; + } + + Map<String, byte[]> result = new TreeMap<>(); + result.put(DIRECT_HOLDER, + generateDirectMethodHandleHolderClassBytes( + DIRECT_HOLDER, directMethodTypes, dmhTypes)); + result.put(DELEGATING_HOLDER, + generateDelegatingMethodHandleHolderClassBytes( + DELEGATING_HOLDER, directMethodTypes)); + result.put(INVOKERS_HOLDER, + generateInvokersHolderClassBytes(INVOKERS_HOLDER, + invokerMethodTypes, callSiteMethodTypes)); + result.put(BASIC_FORMS_HOLDER, + generateBasicFormsClassBytes(BASIC_FORMS_HOLDER)); + + speciesTypes.forEach(types -> { + Map.Entry<String, byte[]> entry = generateConcreteBMHClassBytes(types); + result.put(entry.getKey(), entry.getValue()); + }); + + // clear builder + speciesTypes.clear(); + invokerTypes.clear(); + callSiteTypes.clear(); + dmhMethods.clear(); + + return result; + } + + private static MethodType asMethodType(String basicSignatureString) { + String[] parts = basicSignatureString.split("_"); + assert (parts.length == 2); + assert (parts[1].length() == 1); + String parameters = expandSignature(parts[0]); + Class<?> rtype = simpleType(parts[1].charAt(0)); + if (parameters.isEmpty()) { + return MethodType.methodType(rtype); + } else { + Class<?>[] ptypes = new Class<?>[parameters.length()]; + for (int i = 0; i < ptypes.length; i++) { + ptypes[i] = simpleType(parameters.charAt(i)); + } + return MethodType.methodType(rtype, ptypes); + } + } + + private void addDMHMethodType(String dmh, String methodType) { + validateMethodType(methodType); + Set<String> methodTypes = dmhMethods.get(dmh); + if (methodTypes == null) { + methodTypes = new TreeSet<>(); + dmhMethods.put(dmh, methodTypes); + } + methodTypes.add(methodType); + } + + private static void validateMethodType(String type) { + String[] typeParts = type.split("_"); + // check return type (second part) + if (typeParts.length != 2 || typeParts[1].length() != 1 + || !isBasicTypeChar(typeParts[1].charAt(0))) { + throw new RuntimeException( + "Method type signature must be of form [LJIFD]*_[LJIFDV]"); + } + // expand and check arguments (first part) + expandSignature(typeParts[0]); + } + + // Convert LL -> LL, L3 -> LLL + private static String expandSignature(String signature) { + StringBuilder sb = new StringBuilder(); + char last = 'X'; + int count = 0; + for (int i = 0; i < signature.length(); i++) { + char c = signature.charAt(i); + if (c >= '0' && c <= '9') { + count *= 10; + count += (c - '0'); + } else { + requireBasicType(c); + for (int j = 1; j < count; j++) { + sb.append(last); + } + sb.append(c); + last = c; + count = 0; + } + } + + // ended with a number, e.g., "L2": append last char count - 1 times + if (count > 1) { + requireBasicType(last); + for (int j = 1; j < count; j++) { + sb.append(last); + } + } + return sb.toString(); + } + + private static void requireBasicType(char c) { + if (!isArgBasicTypeChar(c)) { + throw new RuntimeException( + "Character " + c + " must correspond to a basic field type: LIJFD"); + } + } + + private static Class<?> simpleType(char c) { + if (isBasicTypeChar(c)) { + return LambdaForm.BasicType.basicType(c).basicTypeClass(); + } + switch (c) { + case 'Z': + case 'B': + case 'S': + case 'C': + throw new IllegalArgumentException("Not a valid primitive: " + c + + " (use I instead)"); + default: + throw new IllegalArgumentException("Not a primitive: " + c); + } + } + } + + /* + * Returns a map of class name in internal form to the corresponding class bytes + * per the given stream of SPECIES_RESOLVE and LF_RESOLVE trace logs. + * + * Used by GenerateJLIClassesPlugin to pre-generate holder classes during + * jlink phase. + */ + static Map<String, byte[]> generateHolderClasses(Stream<String> traces) { + HolderClassBuilder builder = new HolderClassBuilder(); + traces.map(line -> line.split(" ")) + .forEach(parts -> { + switch (parts[0]) { + case SPECIES_RESOLVE: + // Allow for new types of species data classes being resolved here + assert parts.length == 3; + if (parts[1].startsWith(BMH_SPECIES_PREFIX)) { + String species = parts[1].substring(BMH_SPECIES_PREFIX.length()); + if (!"L".equals(species)) { + builder.addSpeciesType(species); + } + } + break; + case LF_RESOLVE: + assert parts.length > 3; + String methodType = parts[3]; + if (parts[1].equals(INVOKERS_HOLDER_CLASS_NAME)) { + if ("linkToTargetMethod".equals(parts[2]) || + "linkToCallSite".equals(parts[2])) { + builder.addCallSiteType(methodType); + } else { + builder.addInvokerType(methodType); + } + } else if (parts[1].contains("DirectMethodHandle")) { + String dmh = parts[2]; + // ignore getObject etc for now (generated by default) + if (DMH_METHOD_TYPE_MAP.containsKey(dmh)) { + builder.addDMHMethodType(dmh, methodType); + } + } + break; + default: + break; // ignore + } + }); + + return builder.build(); + } + + /** + * Returns a {@code byte[]} representation of a class implementing + * the zero and identity forms of all {@code LambdaForm.BasicType}s. + */ static byte[] generateBasicFormsClassBytes(String className) { ArrayList<LambdaForm> forms = new ArrayList<>(); ArrayList<String> names = new ArrayList<>(); @@ -68,6 +380,11 @@ forms.toArray(new LambdaForm[0])); } + /** + * Returns a {@code byte[]} representation of a class implementing + * DirectMethodHandle of each pairwise combination of {@code MethodType} and + * an {@code int} representing method type. + */ static byte[] generateDirectMethodHandleHolderClassBytes(String className, MethodType[] methodTypes, int[] types) { ArrayList<LambdaForm> forms = new ArrayList<>(); @@ -115,6 +432,11 @@ forms.toArray(new LambdaForm[0])); } + /** + * Returns a {@code byte[]} representation of a class implementing + * DelegatingMethodHandles of each {@code MethodType} kind in the + * {@code methodTypes} argument. + */ static byte[] generateDelegatingMethodHandleHolderClassBytes(String className, MethodType[] methodTypes) { @@ -145,6 +467,11 @@ forms.toArray(new LambdaForm[0])); } + /** + * Returns a {@code byte[]} representation of a class implementing + * the invoker forms for the set of supplied {@code invokerMethodTypes} + * and {@code callSiteMethodTypes}. + */ static byte[] generateInvokersHolderClassBytes(String className, MethodType[] invokerMethodTypes, MethodType[] callSiteMethodTypes) { @@ -193,10 +520,7 @@ * Generate customized code for a set of LambdaForms of specified types into * a class with a specified name. */ - private static byte[] generateCodeBytesForLFs(String className, - String[] names, LambdaForm[] forms) { - - + private static byte[] generateCodeBytesForLFs(String className, String[] names, LambdaForm[] forms) { ClassWriter cw = new ClassWriter(ClassWriter.COMPUTE_MAXS + ClassWriter.COMPUTE_FRAMES); cw.visit(Opcodes.V1_8, Opcodes.ACC_PRIVATE + Opcodes.ACC_FINAL + Opcodes.ACC_SUPER, className, null, InvokerBytecodeGenerator.INVOKER_SUPER_NAME, null); @@ -229,10 +553,14 @@ DelegatingMethodHandle.NF_getTarget); } + /** + * Returns a {@code byte[]} representation of {@code BoundMethodHandle} + * species class implementing the signature defined by {@code types}. + */ @SuppressWarnings({"rawtypes", "unchecked"}) static Map.Entry<String, byte[]> generateConcreteBMHClassBytes(final String types) { for (char c : types.toCharArray()) { - if ("LIJFD".indexOf(c) < 0) { + if (!isArgBasicTypeChar(c)) { throw new IllegalArgumentException("All characters must " + "correspond to a basic field type: LIJFD"); }
--- a/src/java.base/share/classes/java/lang/invoke/InvokerBytecodeGenerator.java Wed Sep 02 20:33:29 2020 -0700 +++ b/src/java.base/share/classes/java/lang/invoke/InvokerBytecodeGenerator.java Tue Sep 08 15:28:06 2020 +0800 @@ -46,6 +46,7 @@ import java.util.List; import java.util.stream.Stream; +import static java.lang.invoke.GenerateJLIClassesHelper.traceLambdaForm; import static java.lang.invoke.LambdaForm.BasicType; import static java.lang.invoke.LambdaForm.BasicType.*; import static java.lang.invoke.LambdaForm.*; @@ -322,7 +323,9 @@ private static MemberName resolveInvokerMember(Class<?> invokerClass, String name, MethodType type) { MemberName member = new MemberName(invokerClass, name, type, REF_invokeStatic); try { - member = MEMBERNAME_FACTORY.resolveOrFail(REF_invokeStatic, member, HOST_CLASS, ReflectiveOperationException.class); + member = MEMBERNAME_FACTORY.resolveOrFail(REF_invokeStatic, member, + HOST_CLASS, LM_TRUSTED, + ReflectiveOperationException.class); } catch (ReflectiveOperationException e) { throw newInternalError(e); } @@ -693,11 +696,8 @@ private static MemberName resolveFrom(String name, MethodType type, Class<?> holder) { MemberName member = new MemberName(holder, name, type, REF_invokeStatic); - MemberName resolvedMember = MemberName.getFactory().resolveOrNull(REF_invokeStatic, member, holder); - if (TRACE_RESOLVE) { - System.out.println("[LF_RESOLVE] " + holder.getName() + " " + name + " " + - shortenSignature(basicTypeSignature(type)) + (resolvedMember != null ? " (success)" : " (fail)") ); - } + MemberName resolvedMember = MemberName.getFactory().resolveOrNull(REF_invokeStatic, member, holder, LM_TRUSTED); + traceLambdaForm(name, type, holder, resolvedMember); return resolvedMember; }
--- a/src/java.base/share/classes/java/lang/invoke/Invokers.java Wed Sep 02 20:33:29 2020 -0700 +++ b/src/java.base/share/classes/java/lang/invoke/Invokers.java Tue Sep 08 15:28:06 2020 +0800 @@ -661,7 +661,7 @@ MemberName member = new MemberName(Invokers.class, name, type, REF_invokeStatic); return new NamedFunction( MemberName.getFactory() - .resolveOrFail(REF_invokeStatic, member, Invokers.class, NoSuchMethodException.class)); + .resolveOrFail(REF_invokeStatic, member, Invokers.class, LM_TRUSTED, NoSuchMethodException.class)); } private static class Lazy {
--- a/src/java.base/share/classes/java/lang/invoke/LambdaForm.java Wed Sep 02 20:33:29 2020 -0700 +++ b/src/java.base/share/classes/java/lang/invoke/LambdaForm.java Tue Sep 08 15:28:06 2020 +0800 @@ -40,7 +40,7 @@ import java.util.HashMap; import static java.lang.invoke.LambdaForm.BasicType.*; -import static java.lang.invoke.MethodHandleNatives.Constants.REF_invokeStatic; +import static java.lang.invoke.MethodHandleNatives.Constants.*; import static java.lang.invoke.MethodHandleStatics.*; /** @@ -1758,10 +1758,10 @@ MemberName idMem = new MemberName(LambdaForm.class, "identity_"+btChar, idType, REF_invokeStatic); MemberName zeMem = null; try { - idMem = IMPL_NAMES.resolveOrFail(REF_invokeStatic, idMem, null, NoSuchMethodException.class); + idMem = IMPL_NAMES.resolveOrFail(REF_invokeStatic, idMem, null, LM_TRUSTED, NoSuchMethodException.class); if (!isVoid) { zeMem = new MemberName(LambdaForm.class, "zero_"+btChar, zeType, REF_invokeStatic); - zeMem = IMPL_NAMES.resolveOrFail(REF_invokeStatic, zeMem, null, NoSuchMethodException.class); + zeMem = IMPL_NAMES.resolveOrFail(REF_invokeStatic, zeMem, null, LM_TRUSTED, NoSuchMethodException.class); } } catch (IllegalAccessException|NoSuchMethodException ex) { throw newInternalError(ex);
--- a/src/java.base/share/classes/java/lang/invoke/MemberName.java Wed Sep 02 20:33:29 2020 -0700 +++ b/src/java.base/share/classes/java/lang/invoke/MemberName.java Tue Sep 08 15:28:06 2020 +0800 @@ -1064,7 +1064,7 @@ * If lookup fails or access is not permitted, null is returned. * Otherwise a fresh copy of the given member is returned, with modifier bits filled in. */ - private MemberName resolve(byte refKind, MemberName ref, Class<?> lookupClass, + private MemberName resolve(byte refKind, MemberName ref, Class<?> lookupClass, int allowedModes, boolean speculativeResolve) { MemberName m = ref.clone(); // JVM will side-effect the ref assert(refKind == m.getReferenceKind()); @@ -1084,7 +1084,7 @@ // // REFC view on PTYPES doesn't matter, since it is used only as a starting point for resolution and doesn't // participate in method selection. - m = MethodHandleNatives.resolve(m, lookupClass, speculativeResolve); + m = MethodHandleNatives.resolve(m, lookupClass, allowedModes, speculativeResolve); if (m == null && speculativeResolve) { return null; } @@ -1108,10 +1108,12 @@ * Otherwise a fresh copy of the given member is returned, with modifier bits filled in. */ public <NoSuchMemberException extends ReflectiveOperationException> - MemberName resolveOrFail(byte refKind, MemberName m, Class<?> lookupClass, - Class<NoSuchMemberException> nsmClass) + MemberName resolveOrFail(byte refKind, MemberName m, + Class<?> lookupClass, int allowedModes, + Class<NoSuchMemberException> nsmClass) throws IllegalAccessException, NoSuchMemberException { - MemberName result = resolve(refKind, m, lookupClass, false); + assert lookupClass != null || allowedModes == LM_TRUSTED; + MemberName result = resolve(refKind, m, lookupClass, allowedModes, false); if (result.isResolved()) return result; ReflectiveOperationException ex = result.makeAccessException(); @@ -1124,8 +1126,9 @@ * If lookup fails or access is not permitted, return null. * Otherwise a fresh copy of the given member is returned, with modifier bits filled in. */ - public MemberName resolveOrNull(byte refKind, MemberName m, Class<?> lookupClass) { - MemberName result = resolve(refKind, m, lookupClass, true); + public MemberName resolveOrNull(byte refKind, MemberName m, Class<?> lookupClass, int allowedModes) { + assert lookupClass != null || allowedModes == LM_TRUSTED; + MemberName result = resolve(refKind, m, lookupClass, allowedModes, true); if (result != null && result.isResolved()) return result; return null;
--- a/src/java.base/share/classes/java/lang/invoke/MethodHandleImpl.java Wed Sep 02 20:33:29 2020 -0700 +++ b/src/java.base/share/classes/java/lang/invoke/MethodHandleImpl.java Tue Sep 08 15:28:06 2020 +0800 @@ -1764,41 +1764,8 @@ } @Override - public byte[] generateDirectMethodHandleHolderClassBytes( - String className, MethodType[] methodTypes, int[] types) { - return GenerateJLIClassesHelper - .generateDirectMethodHandleHolderClassBytes( - className, methodTypes, types); - } - - @Override - public byte[] generateDelegatingMethodHandleHolderClassBytes( - String className, MethodType[] methodTypes) { - return GenerateJLIClassesHelper - .generateDelegatingMethodHandleHolderClassBytes( - className, methodTypes); - } - - @Override - public Map.Entry<String, byte[]> generateConcreteBMHClassBytes( - final String types) { - return GenerateJLIClassesHelper - .generateConcreteBMHClassBytes(types); - } - - @Override - public byte[] generateBasicFormsClassBytes(final String className) { - return GenerateJLIClassesHelper - .generateBasicFormsClassBytes(className); - } - - @Override - public byte[] generateInvokersHolderClassBytes(final String className, - MethodType[] invokerMethodTypes, - MethodType[] callSiteMethodTypes) { - return GenerateJLIClassesHelper - .generateInvokersHolderClassBytes(className, - invokerMethodTypes, callSiteMethodTypes); + public Map<String, byte[]> generateHolderClasses(Stream<String> traces) { + return GenerateJLIClassesHelper.generateHolderClasses(traces); } @Override
--- a/src/java.base/share/classes/java/lang/invoke/MethodHandleNatives.java Wed Sep 02 20:33:29 2020 -0700 +++ b/src/java.base/share/classes/java/lang/invoke/MethodHandleNatives.java Tue Sep 08 15:28:06 2020 +0800 @@ -51,7 +51,7 @@ static native void init(MemberName self, Object ref); static native void expand(MemberName self); - static native MemberName resolve(MemberName self, Class<?> caller, + static native MemberName resolve(MemberName self, Class<?> caller, int lookupMode, boolean speculativeResolve) throws LinkageError, ClassNotFoundException; static native int getMembers(Class<?> defc, String matchName, String matchSig, int matchFlags, Class<?> caller, int skip, MemberName[] results); @@ -149,6 +149,15 @@ HIDDEN_CLASS = 0x00000002, STRONG_LOADER_LINK = 0x00000004, ACCESS_VM_ANNOTATIONS = 0x00000008; + + /** + * Lookup modes + */ + static final int + LM_MODULE = Lookup.MODULE, + LM_UNCONDITIONAL = Lookup.UNCONDITIONAL, + LM_TRUSTED = -1; + } static boolean refKindIsValid(int refKind) { @@ -561,7 +570,7 @@ guardType, REF_invokeStatic); linker = MemberName.getFactory().resolveOrNull(REF_invokeStatic, linker, - VarHandleGuards.class); + VarHandleGuards.class, LM_TRUSTED); if (linker != null) { return linker; }
--- a/src/java.base/share/classes/java/lang/invoke/MethodHandles.java Wed Sep 02 20:33:29 2020 -0700 +++ b/src/java.base/share/classes/java/lang/invoke/MethodHandles.java Tue Sep 08 15:28:06 2020 +0800 @@ -1409,14 +1409,7 @@ // This is just for calling out to MethodHandleImpl. private Class<?> lookupClassOrNull() { - if (allowedModes == TRUSTED) { - return null; - } - if (allowedModes == UNCONDITIONAL) { - // use Object as the caller to pass to VM doing resolution - return Object.class; - } - return lookupClass; + return (allowedModes == TRUSTED) ? null : lookupClass; } /** Tells which access-protection classes of members this lookup object can produce. @@ -3442,7 +3435,7 @@ checkSymbolicClass(refc); // do this before attempting to resolve Objects.requireNonNull(name);