changeset 62810:1ee425a632ff vector-unstable tip

manual merge with default
author njian
date Tue, 08 Sep 2020 15:28:06 +0800
parents 0474716a2fa5 bdc20ee1a68d
children
files make/hotspot/gensrc/GensrcAdlc.gmk src/hotspot/cpu/aarch64/aarch64-asmtest.py src/hotspot/cpu/aarch64/aarch64.ad src/hotspot/cpu/aarch64/assembler_aarch64.cpp src/hotspot/cpu/aarch64/assembler_aarch64.hpp src/hotspot/cpu/aarch64/globals_aarch64.hpp src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp src/hotspot/cpu/aarch64/vm_version_aarch64.cpp src/hotspot/cpu/arm/arm.ad src/hotspot/cpu/ppc/ppc.ad src/hotspot/cpu/s390/s390.ad src/hotspot/cpu/x86/x86.ad src/hotspot/cpu/x86/x86_64.ad src/hotspot/share/adlc/archDesc.cpp src/hotspot/share/adlc/formssel.cpp src/hotspot/share/ci/ciMethod.cpp src/hotspot/share/classfile/javaClasses.hpp src/hotspot/share/classfile/vmSymbols.hpp src/hotspot/share/opto/c2compiler.cpp src/hotspot/share/opto/classes.hpp src/hotspot/share/opto/library_call.cpp src/hotspot/share/opto/matcher.cpp src/hotspot/share/opto/matcher.hpp src/hotspot/share/opto/opcodes.cpp src/hotspot/share/opto/opcodes.hpp src/hotspot/share/opto/superword.cpp src/hotspot/share/opto/type.cpp src/hotspot/share/opto/type.hpp src/hotspot/share/opto/vectornode.cpp src/hotspot/share/runtime/globals.hpp src/hotspot/share/runtime/vmStructs.cpp src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/CheckGraalIntrinsics.java test/hotspot/jtreg/vmTestbase/nsk/sysdict/TEST.properties test/hotspot/jtreg/vmTestbase/vm/mlvm/TEST.properties
diffstat 158 files changed, 6602 insertions(+), 1921 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.gitattributes	Tue Sep 08 15:28:06 2020 +0800
@@ -0,0 +1,1 @@
+*	-text
--- a/.hgtags	Wed Sep 02 20:33:29 2020 -0700
+++ b/.hgtags	Tue Sep 08 15:28:06 2020 +0800
@@ -661,3 +661,4 @@
 5c18d696c7ce724ca36df13933aa53f50e12b9e0 jdk-16+11
 fc8e62b399bd93d06e8d13dc3b384c450e853dcd jdk-16+12
 fd07cdb26fc70243ef23d688b545514f4ddf1c2b jdk-16+13
+36b29df125dc88f11657ce93b4998aa9ff5f5d41 jdk-16+14
--- a/make/hotspot/gensrc/GensrcAdlc.gmk	Wed Sep 02 20:33:29 2020 -0700
+++ b/make/hotspot/gensrc/GensrcAdlc.gmk	Tue Sep 08 15:28:06 2020 +0800
@@ -132,6 +132,7 @@
   ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
     AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
         $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_neon.ad \
+        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \
     )))
   endif
 
--- a/src/hotspot/cpu/aarch64/aarch64-asmtest.py	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/aarch64-asmtest.py	Tue Sep 08 15:28:06 2020 +0800
@@ -71,6 +71,49 @@
         else:
             return self.astr("r")
 
+class SVEVectorRegister(FloatRegister):
+    def __str__(self):
+        return self.astr("z")
+
+class SVEPRegister(Register):
+    def __str__(self):
+        return self.astr("p")
+
+    def generate(self):
+        self.number = random.randint(0, 15)
+        return self
+
+class SVEGoverningPRegister(Register):
+    def __str__(self):
+        return self.astr("p")
+    def generate(self):
+        self.number = random.randint(0, 7)
+        return self
+
+class RegVariant(object):
+    def __init__(self, low, high):
+        self.number = random.randint(low, high)
+
+    def astr(self):
+        nameMap = {
+             0: ".b",
+             1: ".h",
+             2: ".s",
+             3: ".d",
+             4: ".q"
+        }
+        return nameMap.get(self.number)
+
+    def cstr(self):
+        nameMap = {
+             0: "__ B",
+             1: "__ H",
+             2: "__ S",
+             3: "__ D",
+             4: "__ Q"
+        }
+        return nameMap.get(self.number)
+
 class FloatZero(Operand):
 
     def __str__(self):
@@ -87,7 +130,10 @@
               'h' : FloatRegister,
               's' : FloatRegister,
               'd' : FloatRegister,
-              'z' : FloatZero}
+              'z' : FloatZero,
+              'p' : SVEPRegister,
+              'P' : SVEGoverningPRegister,
+              'Z' : SVEVectorRegister}
 
     @classmethod
     def create(cls, mode):
@@ -845,6 +891,100 @@
                 % tuple([Instruction.astr(self)] +
                         [(self.reg[i].astr(self.modes[i])) for i in range(self.numRegs)]))
 
+class SVEVectorOp(Instruction):
+    def __init__(self, args):
+        name = args[0]
+        regTypes = args[1]
+        regs = []
+        for c in regTypes:
+            regs.append(OperandFactory.create(c).generate())
+        self.reg = regs
+        self.numRegs = len(regs)
+        if regTypes[0] != "p" and regTypes[1] == 'P':
+           self._isPredicated = True
+           self._merge = "/m"
+        else:
+           self._isPredicated = False
+           self._merge =""
+
+        self._bitwiseop = False
+        if name[0] == 'f':
+            self._width = RegVariant(2, 3)
+        elif not self._isPredicated and (name == "and" or name == "eor" or name == "orr"):
+            self._width = RegVariant(3, 3)
+            self._bitwiseop = True
+        else:
+            self._width = RegVariant(0, 3)
+        if len(args) > 2:
+            self._dnm = args[2]
+        else:
+            self._dnm = None
+        Instruction.__init__(self, name)
+
+    def cstr(self):
+        formatStr = "%s%s" + ''.join([", %s" for i in range(0, self.numRegs)] + [");"])
+        if self._bitwiseop:
+            width = []
+            formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)] + [");"])
+        else:
+            width = [self._width.cstr()]
+        return (formatStr
+                % tuple(["__ sve_" + self._name + "("] +
+                        [str(self.reg[0])] +
+                        width +
+                        [str(self.reg[i]) for i in range(1, self.numRegs)]))
+    def astr(self):
+        formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)])
+        if self._dnm == 'dn':
+            formatStr += ", %s"
+            dnReg = [str(self.reg[0]) + self._width.astr()]
+        else:
+            dnReg = []
+
+        if self._isPredicated:
+            restRegs = [str(self.reg[1]) + self._merge] + dnReg + [str(self.reg[i]) + self._width.astr() for i in range(2, self.numRegs)]
+        else:
+            restRegs = dnReg + [str(self.reg[i]) + self._width.astr() for i in range(1, self.numRegs)]
+        return (formatStr
+                % tuple([Instruction.astr(self)] +
+                        [str(self.reg[0]) + self._width.astr()] +
+                        restRegs))
+    def generate(self):
+        return self
+
+class SVEReductionOp(Instruction):
+    def __init__(self, args):
+        name = args[0]
+        lowRegType = args[1]
+        self.reg = []
+        Instruction.__init__(self, name)
+        self.reg.append(OperandFactory.create('s').generate())
+        self.reg.append(OperandFactory.create('P').generate())
+        self.reg.append(OperandFactory.create('Z').generate())
+        self._width = RegVariant(lowRegType, 3)
+    def cstr(self):
+        return "__ sve_%s(%s, %s, %s, %s);" % (self.name(),
+                                              str(self.reg[0]),
+                                              self._width.cstr(),
+                                              str(self.reg[1]),
+                                              str(self.reg[2]))
+    def astr(self):
+        if self.name() == "uaddv":
+            dstRegName = "d" + str(self.reg[0].number)
+        else:
+            dstRegName = self._width.astr()[1] + str(self.reg[0].number)
+        formatStr = "%s %s, %s, %s"
+        if self.name() == "fadda":
+            formatStr += ", %s"
+            moreReg = [dstRegName]
+        else:
+            moreReg = []
+        return formatStr % tuple([self.name()] +
+                                 [dstRegName] +
+                                 [str(self.reg[1])] +
+                                 moreReg +
+                                 [str(self.reg[2]) + self._width.astr()])
+
 class LdStNEONOp(Instruction):
     def __init__(self, args):
         self._name, self.regnum, self.arrangement, self.addresskind = args
@@ -1311,7 +1451,42 @@
                         ["mov",    "__ mov(v1, __ T2S, 1, zr);",                         "mov\tv1.s[1], wzr"],
                         ["mov",    "__ mov(v1, __ T4H, 2, zr);",                         "mov\tv1.h[2], wzr"],
                         ["mov",    "__ mov(v1, __ T8B, 3, zr);",                         "mov\tv1.b[3], wzr"],
-                        ["ld1",    "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"]])
+                        ["ld1",    "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"],
+                        # SVE instructions
+                        ["cpy",    "__ sve_cpy(z0, __ S, p0, v1);",                      "mov\tz0.s, p0/m, s1"],
+                        ["inc",    "__ sve_inc(r0, __ S);",                              "incw\tx0"],
+                        ["dec",    "__ sve_dec(r1, __ H);",                              "dech\tx1"],
+                        ["lsl",    "__ sve_lsl(z0, __ B, z1, 7);",                       "lsl\tz0.b, z1.b, #7"],
+                        ["lsl",    "__ sve_lsl(z21, __ H, z1, 15);",                     "lsl\tz21.h, z1.h, #15"],
+                        ["lsl",    "__ sve_lsl(z0, __ S, z1, 31);",                      "lsl\tz0.s, z1.s, #31"],
+                        ["lsl",    "__ sve_lsl(z0, __ D, z1, 63);",                      "lsl\tz0.d, z1.d, #63"],
+                        ["lsr",    "__ sve_lsr(z0, __ B, z1, 7);",                       "lsr\tz0.b, z1.b, #7"],
+                        ["asr",    "__ sve_asr(z0, __ H, z11, 15);",                     "asr\tz0.h, z11.h, #15"],
+                        ["lsr",    "__ sve_lsr(z30, __ S, z1, 31);",                     "lsr\tz30.s, z1.s, #31"],
+                        ["asr",    "__ sve_asr(z0, __ D, z1, 63);",                      "asr\tz0.d, z1.d, #63"],
+                        ["addvl",  "__ sve_addvl(sp, r0, 31);",                          "addvl\tsp, x0, #31"],
+                        ["addpl",  "__ sve_addpl(r1, sp, -32);",                         "addpl\tx1, sp, -32"],
+                        ["cntp",   "__ sve_cntp(r8, __ B, p0, p1);",                     "cntp\tx8, p0, p1.b"],
+                        ["dup",    "__ sve_dup(z0, __ B, 127);",                         "dup\tz0.b, 127"],
+                        ["dup",    "__ sve_dup(z1, __ H, -128);",                        "dup\tz1.h, -128"],
+                        ["dup",    "__ sve_dup(z2, __ S, 32512);",                       "dup\tz2.s, 32512"],
+                        ["dup",    "__ sve_dup(z7, __ D, -32768);",                      "dup\tz7.d, -32768"],
+                        ["ld1b",   "__ sve_ld1b(z0, __ B, p0, Address(sp));",            "ld1b\t{z0.b}, p0/z, [sp]"],
+                        ["ld1h",   "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));",       "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"],
+                        ["ld1w",   "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));",        "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"],
+                        ["ld1b",   "__ sve_ld1b(z30, __ B, p3, Address(sp, r8));",       "ld1b\t{z30.b}, p3/z, [sp, x8]"],
+                        ["ld1w",   "__ sve_ld1w(z0, __ S, p4, Address(sp, r28));",       "ld1w\t{z0.s}, p4/z, [sp, x28, LSL #2]"],
+                        ["ld1d",   "__ sve_ld1d(z11, __ D, p5, Address(r0, r1));",       "ld1d\t{z11.d}, p5/z, [x0, x1, LSL #3]"],
+                        ["st1b",   "__ sve_st1b(z22, __ B, p6, Address(sp));",           "st1b\t{z22.b}, p6, [sp]"],
+                        ["st1b",   "__ sve_st1b(z31, __ B, p7, Address(sp, -8));",       "st1b\t{z31.b}, p7, [sp, #-8, MUL VL]"],
+                        ["st1w",   "__ sve_st1w(z0, __ S, p1, Address(r0, 7));",         "st1w\t{z0.s}, p1, [x0, #7, MUL VL]"],
+                        ["st1b",   "__ sve_st1b(z0, __ B, p2, Address(sp, r1));",        "st1b\t{z0.b}, p2, [sp, x1]"],
+                        ["st1h",   "__ sve_st1h(z0, __ H, p3, Address(sp, r8));",        "st1h\t{z0.h}, p3, [sp, x8, LSL #1]"],
+                        ["st1d",   "__ sve_st1d(z0, __ D, p4, Address(r0, r18));",       "st1d\t{z0.d}, p4, [x0, x18, LSL #3]"],
+                        ["ldr",    "__ sve_ldr(z0, Address(sp));",                       "ldr\tz0, [sp]"],
+                        ["ldr",    "__ sve_ldr(z31, Address(sp, -256));",                "ldr\tz31, [sp, #-256, MUL VL]"],
+                        ["str",    "__ sve_str(z8, Address(r8, 255));",                  "str\tz8, [x8, #255, MUL VL]"],
+])
 
 print "\n// FloatImmediateOp"
 for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
@@ -1336,18 +1511,59 @@
                          ["ldumin", "ldumin", size, suffix],
                          ["ldumax", "ldumax", size, suffix]]);
 
+generate(SVEVectorOp, [["add", "ZZZ"],
+                       ["sub", "ZZZ"],
+                       ["fadd", "ZZZ"],
+                       ["fmul", "ZZZ"],
+                       ["fsub", "ZZZ"],
+                       ["abs", "ZPZ"],
+                       ["add", "ZPZ", "dn"],
+                       ["asr", "ZPZ", "dn"],
+                       ["cnt", "ZPZ"],
+                       ["lsl", "ZPZ", "dn"],
+                       ["lsr", "ZPZ", "dn"],
+                       ["mul", "ZPZ", "dn"],
+                       ["neg", "ZPZ"],
+                       ["not", "ZPZ"],
+                       ["smax", "ZPZ", "dn"],
+                       ["smin", "ZPZ", "dn"],
+                       ["sub", "ZPZ", "dn"],
+                       ["fabs", "ZPZ"],
+                       ["fadd", "ZPZ", "dn"],
+                       ["fdiv", "ZPZ", "dn"],
+                       ["fmax", "ZPZ", "dn"],
+                       ["fmin", "ZPZ", "dn"],
+                       ["fmul", "ZPZ", "dn"],
+                       ["fneg", "ZPZ"],
+                       ["frintm", "ZPZ"],
+                       ["frintn", "ZPZ"],
+                       ["frintp", "ZPZ"],
+                       ["fsqrt", "ZPZ"],
+                       ["fsub", "ZPZ", "dn"],
+                       ["fmla", "ZPZZ"],
+                       ["fmls", "ZPZZ"],
+                       ["fnmla", "ZPZZ"],
+                       ["fnmls", "ZPZZ"],
+                       ["mla", "ZPZZ"],
+                       ["mls", "ZPZZ"],
+                       ["and", "ZZZ"],
+                       ["eor", "ZZZ"],
+                       ["orr", "ZZZ"],
+                      ])
+
+generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0],
+                          ["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]])
+
 print "\n    __ bind(forth);"
 outfile.write("forth:\n")
 
 outfile.close()
 
-# compile for 8.1 and sha2 because of lse atomics and sha512 crypto extension.
-subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2", "aarch64ops.s", "-o", "aarch64ops.o"])
-output = subprocess.check_output([AARCH64_OBJDUMP, "-d", "aarch64ops.o"])
+# compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension.
+subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"])
 
 print
 print "/*"
-print output
 print "*/"
 
 subprocess.check_call([AARCH64_OBJCOPY, "-O", "binary", "-j", ".text", "aarch64ops.o", "aarch64ops.bin"])
--- a/src/hotspot/cpu/aarch64/aarch64.ad	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/aarch64.ad	Tue Sep 08 15:28:06 2020 +0800
@@ -69,7 +69,7 @@
 //
 //   r0-r7,r10-r26 volatile (caller save)
 //   r27-r32 system (no save, no allocate)
-//   r8-r9 invisible to the allocator (so we can use them as scratch regs)
+//   r8-r9 non-allocatable (so we can use them as scratch regs)
 //
 // as regards Java usage. we don't use any callee save registers
 // because this makes it difficult to de-optimise a frame (see comment
@@ -94,6 +94,10 @@
 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
+reg_def R8      ( NS,  SOC, Op_RegI,  8, r8->as_VMReg()         ); // rscratch1, non-allocatable
+reg_def R8_H    ( NS,  SOC, Op_RegI,  8, r8->as_VMReg()->next() );
+reg_def R9      ( NS,  SOC, Op_RegI,  9, r9->as_VMReg()         ); // rscratch2, non-allocatable
+reg_def R9_H    ( NS,  SOC, Op_RegI,  9, r9->as_VMReg()->next() );
 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
@@ -140,7 +144,7 @@
 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 
 // ----------------------------
-// Float/Double Registers
+// Float/Double/Vector Registers
 // ----------------------------
 
 // Double Registers
@@ -161,165 +165,324 @@
 // the platform ABI treats v8-v15 as callee save). float registers
 // v16-v31 are SOC as per the platform spec
 
-  reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
-  reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
-  reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
-  reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
-
-  reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
-  reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
-  reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
-  reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
-
-  reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
-  reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
-  reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
-  reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
-
-  reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
-  reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
-  reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
-  reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
-
-  reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
-  reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
-  reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
-  reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
-
-  reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
-  reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
-  reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
-  reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
-
-  reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
-  reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
-  reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
-  reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
-
-  reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
-  reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
-  reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
-  reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
-
-  reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
-  reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
-  reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
-  reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
-
-  reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
-  reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
-  reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
-  reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
-
-  reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
-  reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
-  reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
-  reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
-
-  reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
-  reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
-  reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
-  reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
-
-  reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
-  reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
-  reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
-  reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
-
-  reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
-  reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
-  reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
-  reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
-
-  reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
-  reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
-  reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
-  reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
-
-  reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
-  reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
-  reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
-  reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
-
-  reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
-  reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
-  reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
-  reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
-
-  reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
-  reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
-  reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
-  reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
-
-  reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
-  reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
-  reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
-  reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
-
-  reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
-  reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
-  reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
-  reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
-
-  reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
-  reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
-  reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
-  reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
-
-  reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
-  reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
-  reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
-  reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
-
-  reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
-  reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
-  reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
-  reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
-
-  reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
-  reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
-  reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
-  reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
-
-  reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
-  reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
-  reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
-  reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
-
-  reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
-  reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
-  reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
-  reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
-
-  reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
-  reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
-  reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
-  reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
-
-  reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
-  reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
-  reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
-  reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
-
-  reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
-  reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
-  reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
-  reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
-
-  reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
-  reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
-  reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
-  reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
-
-  reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
-  reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
-  reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
-  reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
-
-  reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
-  reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
-  reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
-  reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
+// For SVE vector registers, we simply extend vector register size to 8
+// 'logical' slots. This is nominally 256 bits but it actually covers
+// all possible 'physical' SVE vector register lengths from 128 ~ 2048
+// bits. The 'physical' SVE vector register length is detected during
+// startup, so the register allocator is able to identify the correct
+// number of bytes needed for an SVE spill/unspill.
+// Note that a vector register with 4 slots denotes a 128-bit NEON
+// register allowing it to be distinguished from the corresponding SVE
+// vector register when the SVE vector length is 128 bits.
+
+  reg_def V0   ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()          );
+  reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next()  );
+  reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) );
+  reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) );
+  reg_def V0_L ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(4) );
+  reg_def V0_M ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(5) );
+  reg_def V0_N ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(6) );
+  reg_def V0_O ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(7) );
+
+  reg_def V1   ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()          );
+  reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next()  );
+  reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) );
+  reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) );
+  reg_def V1_L ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(4) );
+  reg_def V1_M ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(5) );
+  reg_def V1_N ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(6) );
+  reg_def V1_O ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(7) );
+
+  reg_def V2   ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()          );
+  reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next()  );
+  reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) );
+  reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) );
+  reg_def V2_L ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(4) );
+  reg_def V2_M ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(5) );
+  reg_def V2_N ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(6) );
+  reg_def V2_O ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(7) );
+
+  reg_def V3   ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()          );
+  reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next()  );
+  reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) );
+  reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) );
+  reg_def V3_L ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(4) );
+  reg_def V3_M ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(5) );
+  reg_def V3_N ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(6) );
+  reg_def V3_O ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(7) );
+
+  reg_def V4   ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()          );
+  reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next()  );
+  reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) );
+  reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) );
+  reg_def V4_L ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(4) );
+  reg_def V4_M ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(5) );
+  reg_def V4_N ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(6) );
+  reg_def V4_O ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(7) );
+
+  reg_def V5   ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()          );
+  reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next()  );
+  reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) );
+  reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) );
+  reg_def V5_L ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(4) );
+  reg_def V5_M ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(5) );
+  reg_def V5_N ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(6) );
+  reg_def V5_O ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(7) );
+
+  reg_def V6   ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()          );
+  reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next()  );
+  reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) );
+  reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) );
+  reg_def V6_L ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(4) );
+  reg_def V6_M ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(5) );
+  reg_def V6_N ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(6) );
+  reg_def V6_O ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(7) );
+
+  reg_def V7   ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()          );
+  reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next()  );
+  reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) );
+  reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) );
+  reg_def V7_L ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(4) );
+  reg_def V7_M ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(5) );
+  reg_def V7_N ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(6) );
+  reg_def V7_O ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(7) );
+
+  reg_def V8   ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()          );
+  reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next()  );
+  reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) );
+  reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) );
+  reg_def V8_L ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(4) );
+  reg_def V8_M ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(5) );
+  reg_def V8_N ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(6) );
+  reg_def V8_O ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(7) );
+
+  reg_def V9   ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()          );
+  reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next()  );
+  reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) );
+  reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) );
+  reg_def V9_L ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(4) );
+  reg_def V9_M ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(5) );
+  reg_def V9_N ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(6) );
+  reg_def V9_O ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(7) );
+
+  reg_def V10   ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()          );
+  reg_def V10_H ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next()  );
+  reg_def V10_J ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2) );
+  reg_def V10_K ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3) );
+  reg_def V10_L ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(4) );
+  reg_def V10_M ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(5) );
+  reg_def V10_N ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(6) );
+  reg_def V10_O ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(7) );
+
+  reg_def V11   ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()          );
+  reg_def V11_H ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next()  );
+  reg_def V11_J ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2) );
+  reg_def V11_K ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3) );
+  reg_def V11_L ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(4) );
+  reg_def V11_M ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(5) );
+  reg_def V11_N ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(6) );
+  reg_def V11_O ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(7) );
+
+  reg_def V12   ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()          );
+  reg_def V12_H ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next()  );
+  reg_def V12_J ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2) );
+  reg_def V12_K ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3) );
+  reg_def V12_L ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(4) );
+  reg_def V12_M ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(5) );
+  reg_def V12_N ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(6) );
+  reg_def V12_O ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(7) );
+
+  reg_def V13   ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()          );
+  reg_def V13_H ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next()  );
+  reg_def V13_J ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2) );
+  reg_def V13_K ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3) );
+  reg_def V13_L ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(4) );
+  reg_def V13_M ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(5) );
+  reg_def V13_N ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(6) );
+  reg_def V13_O ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(7) );
+
+  reg_def V14   ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()          );
+  reg_def V14_H ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next()  );
+  reg_def V14_J ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2) );
+  reg_def V14_K ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3) );
+  reg_def V14_L ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(4) );
+  reg_def V14_M ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(5) );
+  reg_def V14_N ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(6) );
+  reg_def V14_O ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(7) );
+
+  reg_def V15   ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()          );
+  reg_def V15_H ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next()  );
+  reg_def V15_J ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2) );
+  reg_def V15_K ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3) );
+  reg_def V15_L ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(4) );
+  reg_def V15_M ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(5) );
+  reg_def V15_N ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(6) );
+  reg_def V15_O ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(7) );
+
+  reg_def V16   ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()          );
+  reg_def V16_H ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next()  );
+  reg_def V16_J ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2) );
+  reg_def V16_K ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3) );
+  reg_def V16_L ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(4) );
+  reg_def V16_M ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(5) );
+  reg_def V16_N ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(6) );
+  reg_def V16_O ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(7) );
+
+  reg_def V17   ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()          );
+  reg_def V17_H ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next()  );
+  reg_def V17_J ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2) );
+  reg_def V17_K ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3) );
+  reg_def V17_L ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(4) );
+  reg_def V17_M ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(5) );
+  reg_def V17_N ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(6) );
+  reg_def V17_O ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(7) );
+
+  reg_def V18   ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()          );
+  reg_def V18_H ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next()  );
+  reg_def V18_J ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2) );
+  reg_def V18_K ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3) );
+  reg_def V18_L ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(4) );
+  reg_def V18_M ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(5) );
+  reg_def V18_N ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(6) );
+  reg_def V18_O ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(7) );
+
+  reg_def V19   ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()          );
+  reg_def V19_H ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next()  );
+  reg_def V19_J ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2) );
+  reg_def V19_K ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3) );
+  reg_def V19_L ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(4) );
+  reg_def V19_M ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(5) );
+  reg_def V19_N ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(6) );
+  reg_def V19_O ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(7) );
+
+  reg_def V20   ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()          );
+  reg_def V20_H ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next()  );
+  reg_def V20_J ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2) );
+  reg_def V20_K ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3) );
+  reg_def V20_L ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(4) );
+  reg_def V20_M ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(5) );
+  reg_def V20_N ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(6) );
+  reg_def V20_O ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(7) );
+
+  reg_def V21   ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()          );
+  reg_def V21_H ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next()  );
+  reg_def V21_J ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2) );
+  reg_def V21_K ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3) );
+  reg_def V21_L ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(4) );
+  reg_def V21_M ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(5) );
+  reg_def V21_N ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(6) );
+  reg_def V21_O ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(7) );
+
+  reg_def V22   ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()          );
+  reg_def V22_H ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next()  );
+  reg_def V22_J ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2) );
+  reg_def V22_K ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3) );
+  reg_def V22_L ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(4) );
+  reg_def V22_M ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(5) );
+  reg_def V22_N ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(6) );
+  reg_def V22_O ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(7) );
+
+  reg_def V23   ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()          );
+  reg_def V23_H ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next()  );
+  reg_def V23_J ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2) );
+  reg_def V23_K ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3) );
+  reg_def V23_L ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(4) );
+  reg_def V23_M ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(5) );
+  reg_def V23_N ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(6) );
+  reg_def V23_O ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(7) );
+
+  reg_def V24   ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()          );
+  reg_def V24_H ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next()  );
+  reg_def V24_J ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2) );
+  reg_def V24_K ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3) );
+  reg_def V24_L ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(4) );
+  reg_def V24_M ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(5) );
+  reg_def V24_N ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(6) );
+  reg_def V24_O ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(7) );
+
+  reg_def V25   ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()          );
+  reg_def V25_H ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next()  );
+  reg_def V25_J ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2) );
+  reg_def V25_K ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3) );
+  reg_def V25_L ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(4) );
+  reg_def V25_M ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(5) );
+  reg_def V25_N ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(6) );
+  reg_def V25_O ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(7) );
+
+  reg_def V26   ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()          );
+  reg_def V26_H ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next()  );
+  reg_def V26_J ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2) );
+  reg_def V26_K ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3) );
+  reg_def V26_L ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(4) );
+  reg_def V26_M ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(5) );
+  reg_def V26_N ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(6) );
+  reg_def V26_O ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(7) );
+
+  reg_def V27   ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()          );
+  reg_def V27_H ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next()  );
+  reg_def V27_J ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2) );
+  reg_def V27_K ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3) );
+  reg_def V27_L ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(4) );
+  reg_def V27_M ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(5) );
+  reg_def V27_N ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(6) );
+  reg_def V27_O ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(7) );
+
+  reg_def V28   ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()          );
+  reg_def V28_H ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next()  );
+  reg_def V28_J ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2) );
+  reg_def V28_K ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3) );
+  reg_def V28_L ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(4) );
+  reg_def V28_M ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(5) );
+  reg_def V28_N ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(6) );
+  reg_def V28_O ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(7) );
+
+  reg_def V29   ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()          );
+  reg_def V29_H ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next()  );
+  reg_def V29_J ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2) );
+  reg_def V29_K ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3) );
+  reg_def V29_L ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(4) );
+  reg_def V29_M ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(5) );
+  reg_def V29_N ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(6) );
+  reg_def V29_O ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(7) );
+
+  reg_def V30   ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()          );
+  reg_def V30_H ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next()  );
+  reg_def V30_J ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2) );
+  reg_def V30_K ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3) );
+  reg_def V30_L ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(4) );
+  reg_def V30_M ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(5) );
+  reg_def V30_N ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(6) );
+  reg_def V30_O ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(7) );
+
+  reg_def V31   ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()          );
+  reg_def V31_H ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next()  );
+  reg_def V31_J ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2) );
+  reg_def V31_K ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3) );
+  reg_def V31_L ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(4) );
+  reg_def V31_M ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(5) );
+  reg_def V31_N ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(6) );
+  reg_def V31_O ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(7) );
+
+
+// ----------------------------
+// SVE Predicate Registers
+// ----------------------------
+  reg_def P0 (SOC, SOC, Op_RegVMask, 0, p0->as_VMReg());
+  reg_def P1 (SOC, SOC, Op_RegVMask, 1, p1->as_VMReg());
+  reg_def P2 (SOC, SOC, Op_RegVMask, 2, p2->as_VMReg());
+  reg_def P3 (SOC, SOC, Op_RegVMask, 3, p3->as_VMReg());
+  reg_def P4 (SOC, SOC, Op_RegVMask, 4, p4->as_VMReg());
+  reg_def P5 (SOC, SOC, Op_RegVMask, 5, p5->as_VMReg());
+  reg_def P6 (SOC, SOC, Op_RegVMask, 6, p6->as_VMReg());
+  reg_def P7 (SOC, SOC, Op_RegVMask, 7, p7->as_VMReg());
+  reg_def P8 (SOC, SOC, Op_RegVMask, 8, p8->as_VMReg());
+  reg_def P9 (SOC, SOC, Op_RegVMask, 9, p9->as_VMReg());
+  reg_def P10 (SOC, SOC, Op_RegVMask, 10, p10->as_VMReg());
+  reg_def P11 (SOC, SOC, Op_RegVMask, 11, p11->as_VMReg());
+  reg_def P12 (SOC, SOC, Op_RegVMask, 12, p12->as_VMReg());
+  reg_def P13 (SOC, SOC, Op_RegVMask, 13, p13->as_VMReg());
+  reg_def P14 (SOC, SOC, Op_RegVMask, 14, p14->as_VMReg());
+  reg_def P15 (SOC, SOC, Op_RegVMask, 15, p15->as_VMReg());
 
 // ----------------------------
 // Special Registers
@@ -333,7 +496,6 @@
 
 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 
-
 // Specify priority of register selection within phases of register
 // allocation.  Highest priority is first.  A useful heuristic is to
 // give registers a low priority when they are required by machine
@@ -381,50 +543,72 @@
     R29, R29_H, // fp
     R30, R30_H, // lr
     R31, R31_H, // sp
+    R8, R8_H,   // rscratch1
+    R9, R9_H,   // rscratch2
 );
 
 alloc_class chunk1(
 
     // no save
-    V16, V16_H, V16_J, V16_K,
-    V17, V17_H, V17_J, V17_K,
-    V18, V18_H, V18_J, V18_K,
-    V19, V19_H, V19_J, V19_K,
-    V20, V20_H, V20_J, V20_K,
-    V21, V21_H, V21_J, V21_K,
-    V22, V22_H, V22_J, V22_K,
-    V23, V23_H, V23_J, V23_K,
-    V24, V24_H, V24_J, V24_K,
-    V25, V25_H, V25_J, V25_K,
-    V26, V26_H, V26_J, V26_K,
-    V27, V27_H, V27_J, V27_K,
-    V28, V28_H, V28_J, V28_K,
-    V29, V29_H, V29_J, V29_K,
-    V30, V30_H, V30_J, V30_K,
-    V31, V31_H, V31_J, V31_K,
+    V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O,
+    V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O,
+    V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O,
+    V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O,
+    V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O,
+    V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O,
+    V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O,
+    V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O,
+    V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O,
+    V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O,
+    V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O,
+    V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O,
+    V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O,
+    V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O,
+    V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O,
+    V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O,
 
     // arg registers
-    V0, V0_H, V0_J, V0_K,
-    V1, V1_H, V1_J, V1_K,
-    V2, V2_H, V2_J, V2_K,
-    V3, V3_H, V3_J, V3_K,
-    V4, V4_H, V4_J, V4_K,
-    V5, V5_H, V5_J, V5_K,
-    V6, V6_H, V6_J, V6_K,
-    V7, V7_H, V7_J, V7_K,
+    V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O,
+    V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O,
+    V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O,
+    V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O,
+    V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O,
+    V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O,
+    V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O,
+    V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O,
 
     // non-volatiles
-    V8, V8_H, V8_J, V8_K,
-    V9, V9_H, V9_J, V9_K,
-    V10, V10_H, V10_J, V10_K,
-    V11, V11_H, V11_J, V11_K,
-    V12, V12_H, V12_J, V12_K,
-    V13, V13_H, V13_J, V13_K,
-    V14, V14_H, V14_J, V14_K,
-    V15, V15_H, V15_J, V15_K,
-);
-
-alloc_class chunk2(RFLAGS);
+    V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O,
+    V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O,
+    V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O,
+    V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O,
+    V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O,
+    V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O,
+    V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O,
+    V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O,
+);
+
+alloc_class chunk2 (
+    P0,
+    P1,
+    P2,
+    P3,
+    P4,
+    P5,
+    P6,
+    P7,
+
+    P8,
+    P9,
+    P10,
+    P11,
+    P12,
+    P13,
+    P14,
+    P15,
+);
+
+alloc_class chunk3(RFLAGS);
 
 //----------Architecture Description Register Classes--------------------------
 // Several register classes are automatically defined based upon information in
@@ -708,6 +892,42 @@
     V31, V31_H
 );
 
+// Class for all SVE vector registers.
+reg_class vectora_reg (
+    V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O,
+    V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O,
+    V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O,
+    V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O,
+    V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O,
+    V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O,
+    V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O,
+    V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O,
+    V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O,
+    V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O,
+    V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O,
+    V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O,
+    V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O,
+    V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O,
+    V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O,
+    V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O,
+    V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O,
+    V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O,
+    V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O,
+    V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O,
+    V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O,
+    V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O,
+    V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O,
+    V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O,
+    V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O,
+    V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O,
+    V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O,
+    V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O,
+    V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O,
+    V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O,
+    V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O,
+    V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O,
+);
+
 // Class for all 64bit vector registers
 reg_class vectord_reg(
     V0, V0_H,
@@ -940,6 +1160,39 @@
     V31, V31_H
 );
 
+// Class for all SVE predicate registers.
+reg_class pr_reg (
+    P0,
+    P1,
+    P2,
+    P3,
+    P4,
+    P5,
+    P6,
+    // P7, non-allocatable, preserved with all elements preset to TRUE.
+    P8,
+    P9,
+    P10,
+    P11,
+    P12,
+    P13,
+    P14,
+    P15
+);
+
+// Class for SVE governing predicate registers, which are used
+// to determine the active elements of a predicated instruction.
+reg_class gov_pr (
+    P0,
+    P1,
+    P2,
+    P3,
+    P4,
+    P5,
+    P6,
+    // P7, non-allocatable, preserved with all elements preset to TRUE.
+);
+
 // Singleton class for condition codes
 reg_class int_flags(RFLAGS);
 
@@ -1644,6 +1897,10 @@
     __ bind(L_skip_barrier);
   }
 
+  if (UseSVE > 0 && C->max_vector_size() >= 16) {
+    __ reinitialize_ptrue();
+  }
+
   int bangsize = C->output()->bang_size_in_bytes();
   if (C->output()->need_stack_bang(bangsize) && UseStackBanging)
     __ generate_stack_overflow_check(bangsize);
@@ -1742,7 +1999,7 @@
 
 // Figure out which register class each belongs in: rc_int, rc_float or
 // rc_stack.
-enum RC { rc_bad, rc_int, rc_float, rc_stack };
+enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
 
 static enum RC rc_class(OptoReg::Name reg) {
 
@@ -1750,20 +2007,25 @@
     return rc_bad;
   }
 
-  // we have 30 int registers * 2 halves
-  // (rscratch1 and rscratch2 are omitted)
-  int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2);
+  // we have 32 int registers * 2 halves
+  int slots_of_int_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers;
 
   if (reg < slots_of_int_registers) {
     return rc_int;
   }
 
-  // we have 32 float register * 4 halves
-  if (reg < slots_of_int_registers + FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers) {
+  // we have 32 float register * 8 halves
+  int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers;
+  if (reg < slots_of_int_registers + slots_of_float_registers) {
     return rc_float;
   }
 
-  // Between float regs & stack is the flags regs.
+  int slots_of_predicate_registers = PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers;
+  if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_predicate_registers) {
+    return rc_predicate;
+  }
+
+  // Between predicate regs & stack is the flags.
   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
 
   return rc_stack;
@@ -1802,8 +2064,28 @@
 
   if (bottom_type()->isa_vect() != NULL) {
     uint ireg = ideal_reg();
-    assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
-    if (cbuf) {
+    if (ireg == Op_VecA && cbuf) {
+      C2_MacroAssembler _masm(cbuf);
+      int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
+        // stack->stack
+        __ spill_copy_sve_vector_stack_to_stack(src_offset, dst_offset,
+                                                sve_vector_reg_size_in_bytes);
+      } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
+        __ spill_sve_vector(as_FloatRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo),
+                            sve_vector_reg_size_in_bytes);
+      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
+        __ unspill_sve_vector(as_FloatRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo),
+                              sve_vector_reg_size_in_bytes);
+      } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
+        __ sve_orr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                   as_FloatRegister(Matcher::_regEncode[src_lo]),
+                   as_FloatRegister(Matcher::_regEncode[src_lo]));
+      } else {
+        ShouldNotReachHere();
+      }
+    } else if (cbuf) {
+      assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
       C2_MacroAssembler _masm(cbuf);
       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
@@ -1821,12 +2103,12 @@
                as_FloatRegister(Matcher::_regEncode[src_lo]));
       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
-                       ireg == Op_VecD ? __ D : __ Q,
-                       ra_->reg2offset(dst_lo));
+                 ireg == Op_VecD ? __ D : __ Q,
+                 ra_->reg2offset(dst_lo));
       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-                       ireg == Op_VecD ? __ D : __ Q,
-                       ra_->reg2offset(src_lo));
+                   ireg == Op_VecD ? __ D : __ Q,
+                   ra_->reg2offset(src_lo));
       } else {
         ShouldNotReachHere();
       }
@@ -1911,9 +2193,24 @@
       st->print("%s", Matcher::regName[dst_lo]);
     }
     if (bottom_type()->isa_vect() != NULL) {
-      st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
+      int vsize = 0;
+      switch (ideal_reg()) {
+      case Op_VecD:
+        vsize = 64;
+        break;
+      case Op_VecX:
+        vsize = 128;
+        break;
+      case Op_VecA:
+        vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
+        break;
+      default:
+        assert(false, "bad register type for spill");
+        ShouldNotReachHere();
+      }
+      st->print("\t# vector spill size = %d", vsize);
     } else {
-      st->print("\t# spill size = %d", is64 ? 64:32);
+      st->print("\t# spill size = %d", is64 ? 64 : 32);
     }
   }
 
@@ -2082,28 +2379,37 @@
   if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
     return false;
   }
-
-  // Special cases which require vector length
-  switch (opcode) {
-    case Op_MulAddVS2VI: {
-      if (vlen != 4) {
+  int bit_size = vlen * type2aelembytes(bt) * 8;
+  if (UseSVE == 0 && bit_size > 128) {
+    return false;
+  }
+  if (UseSVE > 0) {
+    return op_sve_supported(opcode);
+  } else { // NEON
+    // Special cases
+    switch (opcode) {
+    case Op_MulAddVS2VI:
+      if (bit_size < 128) {
         return false;
       }
       break;
-    }
+    case Op_MulVL:
+      return false;
     case Op_VectorLoadShuffle:
     case Op_VectorRearrange:
       if (vlen < 4) {
         return false;
       }
       break;
-  }
-
+    default:
+      break;
+    }
+  }
   return true; // Per default match rules are supported.
 }
 
 const bool Matcher::has_predicated_vectors(void) {
-  return false;
+  return UseSVE > 0;
 }
 
 bool Matcher::supports_vector_variable_shifts(void) {
@@ -2143,7 +2449,8 @@
 
 // Vector width in bytes.
 const int Matcher::vector_width_in_bytes(BasicType bt) {
-  int size = MIN2(16,(int)MaxVectorSize);
+  // The MaxVectorSize should have been set by detecting SVE max vector register size.
+  int size = MIN2((UseSVE > 0) ? 256 : 16, (int)MaxVectorSize);
   // Minimum 2 values in vector
   if (size < 2*type2aelembytes(bt)) size = 0;
   // But never < 4
@@ -2157,21 +2464,38 @@
 }
 const int Matcher::min_vector_size(const BasicType bt) {
   int max_size = max_vector_size(bt);
-  // Limit the vector size to 8 bytes
-  int size = 8 / type2aelembytes(bt);
-  if (bt == T_BYTE) {
-    // To support vector api shuffle/rearrange.
-    size = 4;
-  } else if (bt == T_BOOLEAN) {
-    // To support vector api load/store mask.
-    size = 2;
-  }
-  if (size < 2) size = 2;
-  return MIN2(size,max_size);
+  if ((UseSVE > 0) && (MaxVectorSize >= 16)) {
+    // Currently vector length less than SVE vector register size is not supported.
+    return max_size;
+  } else { // NEON
+    // Limit the vector size to 8 bytes
+    int size = 8 / type2aelembytes(bt);
+    if (bt == T_BYTE) {
+      // To support vector api shuffle/rearrange.
+      size = 4;
+    } else if (bt == T_BOOLEAN) {
+      // To support vector api load/store mask.
+      size = 2;
+    }
+    if (size < 2) size = 2;
+    return MIN2(size,max_size);
+  }
+}
+
+const bool Matcher::supports_scalable_vector() {
+  return UseSVE > 0;
+}
+
+// Actual max scalable vector register length.
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+  return Matcher::max_vector_size(bt);
 }
 
 // Vector ideal reg.
 const uint Matcher::vector_ideal_reg(int len) {
+  if (UseSVE > 0 && 16 <= len && len <= 256) {
+    return Op_VecA;
+  }
   switch(len) {
     // For 16-bit/32-bit mask vector, reuse VecD.
     case  2:
@@ -3455,6 +3779,11 @@
     if (call == NULL) {
       ciEnv::current()->record_failure("CodeCache is full");
       return;
+    } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
+      // Only non uncommon_trap calls need to reinitialize ptrue.
+      if (uncommon_trap_request() == 0) {
+        __ reinitialize_ptrue();
+      }
     }
   %}
 
@@ -3465,6 +3794,8 @@
     if (call == NULL) {
       ciEnv::current()->record_failure("CodeCache is full");
       return;
+    } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
+      __ reinitialize_ptrue();
     }
   %}
 
@@ -3501,6 +3832,9 @@
       __ bind(retaddr);
       __ add(sp, sp, 2 * wordSize);
     }
+    if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
+      __ reinitialize_ptrue();
+    }
   %}
 
   enc_class aarch64_enc_rethrow() %{
@@ -3510,6 +3844,11 @@
 
   enc_class aarch64_enc_ret() %{
     C2_MacroAssembler _masm(&cbuf);
+#ifdef ASSERT
+    if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
+      __ verify_ptrue();
+    }
+#endif
     __ ret(lr);
   %}
 
@@ -4300,6 +4639,41 @@
   interface(CONST_INTER);
 %}
 
+// 8 bit signed value.
+operand immI8()
+%{
+  predicate(n->get_int() <= 127 && n->get_int() >= -128);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 8 bit signed value (simm8), or #simm8 LSL 8.
+operand immI8_shift8()
+%{
+  predicate((n->get_int() <= 127 && n->get_int() >= -128) ||
+            (n->get_int() <= 32512 && n->get_int() >= -32768 && (n->get_int() & 0xff) == 0));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 8 bit signed value (simm8), or #simm8 LSL 8.
+operand immL8_shift8()
+%{
+  predicate((n->get_long() <= 127 && n->get_long() >= -128) ||
+            (n->get_long() <= 32512 && n->get_long() >= -32768 && (n->get_long() & 0xff) == 0));
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 // 32 bit integer valid for add sub immediate
 operand immIAddSub()
 %{
@@ -4918,6 +5292,18 @@
   interface(REG_INTER);
 %}
 
+// Generic vector class. This will be used for
+// all vector operands, including NEON and SVE,
+// but currently only used for SVE VecA.
+operand vReg()
+%{
+  constraint(ALLOC_IN_RC(vectora_reg));
+  match(VecA);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 operand vecD()
 %{
   constraint(ALLOC_IN_RC(vectord_reg));
@@ -5226,6 +5612,15 @@
   interface(REG_INTER);
 %}
 
+operand pRegGov()
+%{
+  constraint(ALLOC_IN_RC(gov_pr));
+  match(RegVMask);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Flags register, used as output of signed compare instructions
 
 // note that on AArch64 we also use this register as the output for
@@ -13639,6 +14034,77 @@
   ins_pipe(fp_uop_d);
 %}
 
+instruct copySignD_reg(vRegD dst, vRegD src1, vRegD src2, vRegD zero) %{
+  match(Set dst (CopySignD src1 (Binary src2 zero)));
+  effect(TEMP_DEF dst, USE src1, USE src2, USE zero);
+  format %{ "CopySignD  $dst $src1 $src2" %}
+  ins_encode %{
+    FloatRegister dst = as_FloatRegister($dst$$reg),
+                  src1 = as_FloatRegister($src1$$reg),
+                  src2 = as_FloatRegister($src2$$reg),
+                  zero = as_FloatRegister($zero$$reg);
+    __ fnegd(dst, zero);
+    __ bsl(dst, __ T8B, src2, src1);
+  %}
+  ins_pipe(fp_uop_d);
+%}
+
+instruct copySignF_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (CopySignF src1 src2));
+  effect(TEMP_DEF dst, USE src1, USE src2);
+  format %{ "CopySignF  $dst $src1 $src2" %}
+  ins_encode %{
+    FloatRegister dst = as_FloatRegister($dst$$reg),
+                  src1 = as_FloatRegister($src1$$reg),
+                  src2 = as_FloatRegister($src2$$reg);
+    __ movi(dst, __ T2S, 0x80, 24);
+    __ bsl(dst, __ T8B, src2, src1);
+  %}
+  ins_pipe(fp_uop_d);
+%}
+
+instruct signumD_reg(vRegD dst, vRegD src, vRegD zero, vRegD one) %{
+  match(Set dst (SignumD src (Binary zero one)));
+  effect(TEMP_DEF dst, USE src, USE zero, USE one);
+  format %{ "signumD  $dst, $src" %}
+  ins_encode %{
+    FloatRegister src = as_FloatRegister($src$$reg),
+                  dst = as_FloatRegister($dst$$reg),
+                  zero = as_FloatRegister($zero$$reg),
+                  one = as_FloatRegister($one$$reg);
+    __ facgtd(dst, src, zero); // dst=0 for +-0.0 and NaN. 0xFFF..F otherwise
+    __ ushrd(dst, dst, 1);     // dst=0 for +-0.0 and NaN. 0x7FF..F otherwise
+    // Bit selection instruction gets bit from "one" for each enabled bit in
+    // "dst", otherwise gets a bit from "src". For "src" that contains +-0.0 or
+    // NaN the whole "src" will be copied because "dst" is zero. For all other
+    // "src" values dst is 0x7FF..F, which means only the sign bit is copied
+    // from "src", and all other bits are copied from 1.0.
+    __ bsl(dst, __ T8B, one, src);
+  %}
+  ins_pipe(fp_uop_d);
+%}
+
+instruct signumF_reg(vRegF dst, vRegF src, vRegF zero, vRegF one) %{
+  match(Set dst (SignumF src (Binary zero one)));
+  effect(TEMP_DEF dst, USE src, USE zero, USE one);
+  format %{ "signumF  $dst, $src" %}
+  ins_encode %{
+    FloatRegister src = as_FloatRegister($src$$reg),
+                  dst = as_FloatRegister($dst$$reg),
+                  zero = as_FloatRegister($zero$$reg),
+                  one = as_FloatRegister($one$$reg);
+    __ facgts(dst, src, zero);    // dst=0 for +-0.0 and NaN. 0xFFF..F otherwise
+    __ ushr(dst, __ T2S, dst, 1); // dst=0 for +-0.0 and NaN. 0x7FF..F otherwise
+    // Bit selection instruction gets bit from "one" for each enabled bit in
+    // "dst", otherwise gets a bit from "src". For "src" that contains +-0.0 or
+    // NaN the whole "src" will be copied because "dst" is zero. For all other
+    // "src" values dst is 0x7FF..F, which means only the sign bit is copied
+    // from "src", and all other bits are copied from 1.0.
+    __ bsl(dst, __ T8B, one, src);
+  %}
+  ins_pipe(fp_uop_d);
+%}
+
 // ============================================================================
 // Logical Instructions
 
@@ -16151,7 +16617,7 @@
 // Load Vector (128 bits)
 instruct loadV16(vecX dst, vmem16 mem)
 %{
-  predicate(n->as_LoadVector()->memory_size() == 16);
+  predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16);
   match(Set dst (LoadVector mem));
   ins_cost(4 * INSN_COST);
   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
@@ -16207,7 +16673,7 @@
 
 instruct replicate16B(vecX dst, iRegIorL2I src)
 %{
-  predicate(n->as_Vector()->length() == 16);
+  predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
   match(Set dst (ReplicateB src));
   ins_cost(INSN_COST);
   format %{ "dup  $dst, $src\t# vector (16B)" %}
@@ -16232,7 +16698,7 @@
 
 instruct replicate16B_imm(vecX dst, immI con)
 %{
-  predicate(n->as_Vector()->length() == 16);
+  predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
   match(Set dst (ReplicateB con));
   ins_cost(INSN_COST);
   format %{ "movi  $dst, $con\t# vector(16B)" %}
@@ -16257,7 +16723,7 @@
 
 instruct replicate8S(vecX dst, iRegIorL2I src)
 %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
   match(Set dst (ReplicateS src));
   ins_cost(INSN_COST);
   format %{ "dup  $dst, $src\t# vector (8S)" %}
@@ -16282,7 +16748,7 @@
 
 instruct replicate8S_imm(vecX dst, immI con)
 %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
   match(Set dst (ReplicateS con));
   ins_cost(INSN_COST);
   format %{ "movi  $dst, $con\t# vector(8H)" %}
@@ -16306,7 +16772,7 @@
 
 instruct replicate4I(vecX dst, iRegIorL2I src)
 %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
   match(Set dst (ReplicateI src));
   ins_cost(INSN_COST);
   format %{ "dup  $dst, $src\t# vector (4I)" %}
@@ -16330,7 +16796,7 @@
 
 instruct replicate4I_imm(vecX dst, immI con)
 %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
   match(Set dst (ReplicateI con));
   ins_cost(INSN_COST);
   format %{ "movi  $dst, $con\t# vector(4I)" %}
@@ -16342,7 +16808,7 @@
 
 instruct replicate2L(vecX dst, iRegL src)
 %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
   match(Set dst (ReplicateL src));
   ins_cost(INSN_COST);
   format %{ "dup  $dst, $src\t# vector (2L)" %}
@@ -16354,7 +16820,7 @@
 
 instruct replicate2L_zero(vecX dst, immI0 zero)
 %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
   match(Set dst (ReplicateI zero));
   ins_cost(INSN_COST);
   format %{ "movi  $dst, $zero\t# vector(4I)" %}
@@ -16381,7 +16847,7 @@
 
 instruct replicate4F(vecX dst, vRegF src)
 %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
   match(Set dst (ReplicateF src));
   ins_cost(INSN_COST);
   format %{ "dup  $dst, $src\t# vector (4F)" %}
@@ -16394,7 +16860,7 @@
 
 instruct replicate2D(vecX dst, vRegD src)
 %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
   match(Set dst (ReplicateD src));
   ins_cost(INSN_COST);
   format %{ "dup  $dst, $src\t# vector (2D)" %}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad	Tue Sep 08 15:28:06 2020 +0800
@@ -0,0 +1,1637 @@
+//
+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, Arm Limited. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
+
+// AArch64 SVE Architecture Description File
+
+
+// 4 bit signed offset -- for predicated load/store
+
+operand vmemA_immIOffset4()
+%{
+  predicate(Address::offset_ok_for_sve_immed(n->get_int(), 4,
+            Matcher::scalable_vector_reg_size(T_BYTE)));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand vmemA_immLOffset4()
+%{
+  predicate(Address::offset_ok_for_sve_immed(n->get_long(), 4,
+            Matcher::scalable_vector_reg_size(T_BYTE)));
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+
+operand vmemA_indOffI4(iRegP reg, vmemA_immIOffset4 off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg off);
+  op_cost(0);
+  format %{ "[$reg, $off, MUL VL]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand vmemA_indOffL4(iRegP reg, vmemA_immLOffset4 off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg off);
+  op_cost(0);
+  format %{ "[$reg, $off, MUL VL]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
+
+source_hpp %{
+  bool op_sve_supported(int opcode);
+%}
+
+source %{
+
+  static inline BasicType vector_element_basic_type(const MachNode* n) {
+    const TypeVect* vt = n->bottom_type()->is_vect();
+    return vt->element_basic_type();
+  }
+
+  static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
+    int def_idx = use->operand_index(opnd);
+    Node* def = use->in(def_idx);
+    const TypeVect* vt = def->bottom_type()->is_vect();
+    return vt->element_basic_type();
+  }
+
+  typedef void (C2_MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
+                                                             PRegister Pg, const Address &adr);
+
+  // Predicated load/store, with optional ptrue to all elements of given predicate register.
+  static void loadStoreA_predicate(C2_MacroAssembler masm, bool is_store,
+                                   FloatRegister reg, PRegister pg, BasicType bt,
+                                   int opcode, Register base, int index, int size, int disp) {
+    sve_mem_insn_predicate insn;
+    Assembler::SIMD_RegVariant type;
+    int esize = type2aelembytes(bt);
+    if (index == -1) {
+      assert(size == 0, "unsupported address mode: scale size = %d", size);
+      switch(esize) {
+      case 1:
+        insn = is_store ? &C2_MacroAssembler::sve_st1b : &C2_MacroAssembler::sve_ld1b;
+        type = Assembler::B;
+        break;
+      case 2:
+        insn = is_store ? &C2_MacroAssembler::sve_st1h : &C2_MacroAssembler::sve_ld1h;
+        type = Assembler::H;
+        break;
+      case 4:
+        insn = is_store ? &C2_MacroAssembler::sve_st1w : &C2_MacroAssembler::sve_ld1w;
+        type = Assembler::S;
+        break;
+      case 8:
+        insn = is_store ? &C2_MacroAssembler::sve_st1d : &C2_MacroAssembler::sve_ld1d;
+        type = Assembler::D;
+        break;
+      default:
+        assert(false, "unsupported");
+        ShouldNotReachHere();
+      }
+      (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE)));
+    } else {
+      assert(false, "unimplemented");
+      ShouldNotReachHere();
+    }
+  }
+
+  bool op_sve_supported(int opcode) {
+    switch (opcode) {
+      case Op_MulAddVS2VI:
+        // No multiply reduction instructions
+      case Op_MulReductionVD:
+      case Op_MulReductionVF:
+      case Op_MulReductionVI:
+      case Op_MulReductionVL:
+        // Others
+      case Op_Extract:
+      case Op_ExtractB:
+      case Op_ExtractC:
+      case Op_ExtractD:
+      case Op_ExtractF:
+      case Op_ExtractI:
+      case Op_ExtractL:
+      case Op_ExtractS:
+      case Op_ExtractUB:
+        return false;
+      default:
+        return true;
+    }
+  }
+
+%}
+
+definitions %{
+  int_def SVE_COST             (200, 200);
+%}
+
+
+
+
+// All SVE instructions
+
+// vector load/store
+
+// Use predicated vector load/store
+instruct loadV(vReg dst, vmemA mem) %{
+  predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16);
+  match(Set dst (LoadVector mem));
+  ins_cost(SVE_COST);
+  format %{ "sve_ldr $dst, $mem\t # vector (sve)" %}
+  ins_encode %{
+    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
+    loadStoreA_predicate(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue,
+                         vector_element_basic_type(this), $mem->opcode(),
+                         as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct storeV(vReg src, vmemA mem) %{
+  predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16);
+  match(Set mem (StoreVector mem src));
+  ins_cost(SVE_COST);
+  format %{ "sve_str $mem, $src\t # vector (sve)" %}
+  ins_encode %{
+    FloatRegister src_reg = as_FloatRegister($src$$reg);
+    loadStoreA_predicate(C2_MacroAssembler(&cbuf), true, src_reg, ptrue,
+                         vector_element_basic_type(this, $src), $mem->opcode(),
+                         as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+
+// vector abs
+
+instruct vabsB(vReg dst, vReg src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16 &&
+            n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+  match(Set dst (AbsVB src));
+  ins_cost(SVE_COST);
+  format %{ "sve_abs $dst, $src\t# vector (sve) (B)" %}
+  ins_encode %{
+    __ sve_abs(as_FloatRegister($dst$$reg), __ B,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vabsS(vReg dst, vReg src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8 &&
+            n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+  match(Set dst (AbsVS src));
+  ins_cost(SVE_COST);
+  format %{ "sve_abs $dst, $src\t# vector (sve) (H)" %}
+  ins_encode %{
+    __ sve_abs(as_FloatRegister($dst$$reg), __ H,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vabsI(vReg dst, vReg src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 &&
+            n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+  match(Set dst (AbsVI src));
+  ins_cost(SVE_COST);
+  format %{ "sve_abs $dst, $src\t# vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_abs(as_FloatRegister($dst$$reg), __ S,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vabsL(vReg dst, vReg src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
+            n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+  match(Set dst (AbsVL src));
+  ins_cost(SVE_COST);
+  format %{ "sve_abs $dst, $src\t# vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_abs(as_FloatRegister($dst$$reg), __ D,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vabsF(vReg dst, vReg src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 &&
+            n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+  match(Set dst (AbsVF src));
+  ins_cost(SVE_COST);
+  format %{ "sve_fabs $dst, $src\t# vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_fabs(as_FloatRegister($dst$$reg), __ S,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vabsD(vReg dst, vReg src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
+            n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+  match(Set dst (AbsVD src));
+  ins_cost(SVE_COST);
+  format %{ "sve_fabs $dst, $src\t# vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_fabs(as_FloatRegister($dst$$reg), __ D,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector add
+
+instruct vaddB(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+  match(Set dst (AddVB src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (B)" %}
+  ins_encode %{
+    __ sve_add(as_FloatRegister($dst$$reg), __ B,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddS(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+  match(Set dst (AddVS src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (H)" %}
+  ins_encode %{
+    __ sve_add(as_FloatRegister($dst$$reg), __ H,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddI(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (AddVI src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_add(as_FloatRegister($dst$$reg), __ S,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddL(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst (AddVL src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_add(as_FloatRegister($dst$$reg), __ D,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddF(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (AddVF src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_fadd(as_FloatRegister($dst$$reg), __ S,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddD(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst (AddVD src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_fadd(as_FloatRegister($dst$$reg), __ D,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector and
+
+instruct vand(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+  match(Set dst (AndV src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_and  $dst, $src1, $src2\t# vector (sve)" %}
+  ins_encode %{
+    __ sve_and(as_FloatRegister($dst$$reg),
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector or
+
+instruct vor(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+  match(Set dst (OrV src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_orr  $dst, $src1, $src2\t# vector (sve)" %}
+  ins_encode %{
+    __ sve_orr(as_FloatRegister($dst$$reg),
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector xor
+
+instruct vxor(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+  match(Set dst (XorV src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_eor  $dst, $src1, $src2\t# vector (sve)" %}
+  ins_encode %{
+    __ sve_eor(as_FloatRegister($dst$$reg),
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector float div
+
+instruct vdivF(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst_src1 (DivVF dst_src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fdiv  $dst_src1, $dst_src1, $src2\t# vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ S,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vdivD(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst_src1 (DivVD dst_src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fdiv  $dst_src1, $dst_src1, $src2\t# vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ D,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector max
+
+instruct vmaxF(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 &&
+            n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+  match(Set dst_src1 (MaxV dst_src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fmax $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_fmax(as_FloatRegister($dst_src1$$reg), __ S,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmaxD(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
+            n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+  match(Set dst_src1 (MaxV dst_src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fmax $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_fmax(as_FloatRegister($dst_src1$$reg), __ D,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vminF(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 &&
+            n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+  match(Set dst_src1 (MinV dst_src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fmin $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_fmin(as_FloatRegister($dst_src1$$reg), __ S,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vminD(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
+            n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+  match(Set dst_src1 (MinV dst_src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fmin $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_fmin(as_FloatRegister($dst_src1$$reg), __ D,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector fmla
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
+  predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ S,
+         ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
+  predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ D,
+         ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector fmls
+
+// dst_src1 = dst_src1 + -src2 * src3
+// dst_src1 = dst_src1 + src2 * -src3
+instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
+  predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
+  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
+  ins_cost(SVE_COST);
+  format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ S,
+         ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + -src2 * src3
+// dst_src1 = dst_src1 + src2 * -src3
+instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
+  predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
+  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
+  ins_cost(SVE_COST);
+  format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ D,
+         ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector fnmla
+
+// dst_src1 = -dst_src1 + -src2 * src3
+// dst_src1 = -dst_src1 + src2 * -src3
+instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
+  predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
+  ins_cost(SVE_COST);
+  format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ S,
+         ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = -dst_src1 + -src2 * src3
+// dst_src1 = -dst_src1 + src2 * -src3
+instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
+  predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
+  ins_cost(SVE_COST);
+  format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ D,
+         ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector fnmls
+
+// dst_src1 = -dst_src1 + src2 * src3
+instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
+  predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ S,
+         ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = -dst_src1 + src2 * src3
+instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
+  predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ D,
+         ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector mla
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmlaB(vReg dst_src1, vReg src2, vReg src3)
+%{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+  match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (B)" %}
+  ins_encode %{
+    __ sve_mla(as_FloatRegister($dst_src1$$reg), __ B,
+      ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmlaS(vReg dst_src1, vReg src2, vReg src3)
+%{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+  match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (H)" %}
+  ins_encode %{
+    __ sve_mla(as_FloatRegister($dst_src1$$reg), __ H,
+      ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmlaI(vReg dst_src1, vReg src2, vReg src3)
+%{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_mla(as_FloatRegister($dst_src1$$reg), __ S,
+      ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmlaL(vReg dst_src1, vReg src2, vReg src3)
+%{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_mla(as_FloatRegister($dst_src1$$reg), __ D,
+      ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector mls
+
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmlsB(vReg dst_src1, vReg src2, vReg src3)
+%{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+  match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (B)" %}
+  ins_encode %{
+    __ sve_mls(as_FloatRegister($dst_src1$$reg), __ B,
+      ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmlsS(vReg dst_src1, vReg src2, vReg src3)
+%{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+  match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (H)" %}
+  ins_encode %{
+    __ sve_mls(as_FloatRegister($dst_src1$$reg), __ H,
+      ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmlsI(vReg dst_src1, vReg src2, vReg src3)
+%{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_mls(as_FloatRegister($dst_src1$$reg), __ S,
+      ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmlsL(vReg dst_src1, vReg src2, vReg src3)
+%{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_mls(as_FloatRegister($dst_src1$$reg), __ D,
+      ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+
+// vector mul
+
+instruct vmulB(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+  match(Set dst_src1 (MulVB dst_src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (B)" %}
+  ins_encode %{
+    __ sve_mul(as_FloatRegister($dst_src1$$reg), __ B,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulS(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+  match(Set dst_src1 (MulVS dst_src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (H)" %}
+  ins_encode %{
+    __ sve_mul(as_FloatRegister($dst_src1$$reg), __ H,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulI(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst_src1 (MulVI dst_src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_mul(as_FloatRegister($dst_src1$$reg), __ S,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulL(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst_src1 (MulVL dst_src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_mul(as_FloatRegister($dst_src1$$reg), __ D,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulF(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (MulVF src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_fmul(as_FloatRegister($dst$$reg), __ S,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulD(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst (MulVD src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_fmul(as_FloatRegister($dst$$reg), __ D,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector fneg
+
+instruct vnegF(vReg dst, vReg src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+  match(Set dst (NegVF src));
+  ins_cost(SVE_COST);
+  format %{ "sve_fneg $dst, $src\t# vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_fneg(as_FloatRegister($dst$$reg), __ S,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vnegD(vReg dst, vReg src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+  match(Set dst (NegVD src));
+  ins_cost(SVE_COST);
+  format %{ "sve_fneg $dst, $src\t# vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_fneg(as_FloatRegister($dst$$reg), __ D,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// popcount vector
+
+instruct vpopcountI(vReg dst, vReg src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (PopCountVI src));
+  format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t"  %}
+  ins_encode %{
+     __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector add reduction
+
+instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
+  predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+            (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
+  match(Set dst (AddReductionVI src1 src2));
+  effect(TEMP_DEF dst, TEMP tmp);
+  ins_cost(SVE_COST);
+  format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (S)\n\t"
+            "umov  $dst, $tmp, S, 0\n\t"
+            "addw  $dst, $dst, $src1\t # add reduction S" %}
+  ins_encode %{
+    __ sve_uaddv(as_FloatRegister($tmp$$reg), __ S,
+         ptrue, as_FloatRegister($src2$$reg));
+    __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
+    __ addw($dst$$Register, $dst$$Register, $src1$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{
+  predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+            (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG));
+  match(Set dst (AddReductionVL src1 src2));
+  effect(TEMP_DEF dst, TEMP tmp);
+  ins_cost(SVE_COST);
+  format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (D)\n\t"
+            "umov  $dst, $tmp, D, 0\n\t"
+            "add  $dst, $dst, $src1\t # add reduction D" %}
+  ins_encode %{
+    __ sve_uaddv(as_FloatRegister($tmp$$reg), __ D,
+         ptrue, as_FloatRegister($src2$$reg));
+    __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0);
+    __ add($dst$$Register, $dst$$Register, $src1$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addF(vRegF src1_dst, vReg src2) %{
+  predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+  match(Set src1_dst (AddReductionVF src1_dst src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addD(vRegD src1_dst, vReg src2) %{
+  predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+  match(Set src1_dst (AddReductionVD src1_dst src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector max reduction
+
+instruct reduce_maxF(vRegF dst, vRegF src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
+            n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+  match(Set dst (MaxReductionV src1 src2));
+  ins_cost(INSN_COST);
+  effect(TEMP_DEF dst);
+  format %{ "sve_fmaxv $dst, $src2 # vector (sve) (S)\n\t"
+            "fmaxs $dst, $dst, $src1\t # max reduction F" %}
+  ins_encode %{
+    __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S,
+         ptrue, as_FloatRegister($src2$$reg));
+    __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct reduce_maxD(vRegD dst, vRegD src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
+            n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+  match(Set dst (MaxReductionV src1 src2));
+  ins_cost(INSN_COST);
+  effect(TEMP_DEF dst);
+  format %{ "sve_fmaxv $dst, $src2 # vector (sve) (S)\n\t"
+            "fmaxs $dst, $dst, $src1\t # max reduction D" %}
+  ins_encode %{
+    __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D,
+         ptrue, as_FloatRegister($src2$$reg));
+    __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector min reduction
+
+instruct reduce_minF(vRegF dst, vRegF src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
+            n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+  match(Set dst (MinReductionV src1 src2));
+  ins_cost(INSN_COST);
+  effect(TEMP_DEF dst);
+  format %{ "sve_fminv $dst, $src2 # vector (sve) (S)\n\t"
+            "fmins $dst, $dst, $src1\t # min reduction F" %}
+  ins_encode %{
+    __ sve_fminv(as_FloatRegister($dst$$reg), __ S,
+         ptrue, as_FloatRegister($src2$$reg));
+    __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct reduce_minD(vRegD dst, vRegD src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
+            n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+  match(Set dst (MinReductionV src1 src2));
+  ins_cost(INSN_COST);
+  effect(TEMP_DEF dst);
+  format %{ "sve_fminv $dst, $src2 # vector (sve) (S)\n\t"
+            "fmins $dst, $dst, $src1\t # min reduction D" %}
+  ins_encode %{
+    __ sve_fminv(as_FloatRegister($dst$$reg), __ D,
+         ptrue, as_FloatRegister($src2$$reg));
+    __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector Math.rint, floor, ceil
+
+instruct vroundD(vReg dst, vReg src, immI rmode) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
+            n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+  match(Set dst (RoundDoubleModeV src rmode));
+  format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %}
+  ins_encode %{
+    switch ($rmode$$constant) {
+      case RoundDoubleModeNode::rmode_rint:
+        __ sve_frintn(as_FloatRegister($dst$$reg), __ D,
+             ptrue, as_FloatRegister($src$$reg));
+        break;
+      case RoundDoubleModeNode::rmode_floor:
+        __ sve_frintm(as_FloatRegister($dst$$reg), __ D,
+             ptrue, as_FloatRegister($src$$reg));
+        break;
+      case RoundDoubleModeNode::rmode_ceil:
+        __ sve_frintp(as_FloatRegister($dst$$reg), __ D,
+             ptrue, as_FloatRegister($src$$reg));
+        break;
+    }
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector replicate
+
+instruct replicateB(vReg dst, iRegIorL2I src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+  match(Set dst (ReplicateB src));
+  ins_cost(SVE_COST);
+  format %{ "sve_dup  $dst, $src\t# vector (sve) (B)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct replicateS(vReg dst, iRegIorL2I src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+  match(Set dst (ReplicateS src));
+  ins_cost(SVE_COST);
+  format %{ "sve_dup  $dst, $src\t# vector (sve) (H)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct replicateI(vReg dst, iRegIorL2I src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (ReplicateI src));
+  ins_cost(SVE_COST);
+  format %{ "sve_dup  $dst, $src\t# vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct replicateL(vReg dst, iRegL src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst (ReplicateL src));
+  ins_cost(SVE_COST);
+  format %{ "sve_dup  $dst, $src\t# vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+
+instruct replicateB_imm8(vReg dst, immI8 con) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+  match(Set dst (ReplicateB con));
+  ins_cost(SVE_COST);
+  format %{ "sve_dup  $dst, $con\t# vector (sve) (B)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ B, $con$$constant);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct replicateS_imm8(vReg dst, immI8_shift8 con) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+  match(Set dst (ReplicateS con));
+  ins_cost(SVE_COST);
+  format %{ "sve_dup  $dst, $con\t# vector (sve) (H)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ H, $con$$constant);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct replicateI_imm8(vReg dst, immI8_shift8 con) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (ReplicateI con));
+  ins_cost(SVE_COST);
+  format %{ "sve_dup  $dst, $con\t# vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ S, $con$$constant);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct replicateL_imm8(vReg dst, immL8_shift8 con) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst (ReplicateL con));
+  ins_cost(SVE_COST);
+  format %{ "sve_dup  $dst, $con\t# vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ D, $con$$constant);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+
+instruct replicateF(vReg dst, vRegF src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (ReplicateF src));
+  ins_cost(SVE_COST);
+  format %{ "sve_cpy  $dst, $src\t# vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_cpy(as_FloatRegister($dst$$reg), __ S,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct replicateD(vReg dst, vRegD src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst (ReplicateD src));
+  ins_cost(SVE_COST);
+  format %{ "sve_cpy  $dst, $src\t# vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_cpy(as_FloatRegister($dst$$reg), __ D,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector shift
+
+instruct vasrB(vReg dst, vReg shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+  match(Set dst (RShiftVB dst shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (B)" %}
+  ins_encode %{
+    __ sve_asr(as_FloatRegister($dst$$reg), __ B,
+         ptrue, as_FloatRegister($shift$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vasrS(vReg dst, vReg shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+  match(Set dst (RShiftVS dst shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (H)" %}
+  ins_encode %{
+    __ sve_asr(as_FloatRegister($dst$$reg), __ H,
+         ptrue, as_FloatRegister($shift$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vasrI(vReg dst, vReg shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (RShiftVI dst shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_asr(as_FloatRegister($dst$$reg), __ S,
+         ptrue, as_FloatRegister($shift$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vasrL(vReg dst, vReg shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst (RShiftVL dst shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_asr(as_FloatRegister($dst$$reg), __ D,
+         ptrue, as_FloatRegister($shift$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlslB(vReg dst, vReg shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+  match(Set dst (LShiftVB dst shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (B)" %}
+  ins_encode %{
+    __ sve_lsl(as_FloatRegister($dst$$reg), __ B,
+         ptrue, as_FloatRegister($shift$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlslS(vReg dst, vReg shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+  match(Set dst (LShiftVS dst shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (H)" %}
+  ins_encode %{
+    __ sve_lsl(as_FloatRegister($dst$$reg), __ H,
+         ptrue, as_FloatRegister($shift$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlslI(vReg dst, vReg shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (LShiftVI dst shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_lsl(as_FloatRegister($dst$$reg), __ S,
+         ptrue, as_FloatRegister($shift$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlslL(vReg dst, vReg shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst (LShiftVL dst shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_lsl(as_FloatRegister($dst$$reg), __ D,
+         ptrue, as_FloatRegister($shift$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlsrB(vReg dst, vReg shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+  match(Set dst (URShiftVB dst shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (B)" %}
+  ins_encode %{
+    __ sve_lsr(as_FloatRegister($dst$$reg), __ B,
+         ptrue, as_FloatRegister($shift$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlsrS(vReg dst, vReg shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+  match(Set dst (URShiftVS dst shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (H)" %}
+  ins_encode %{
+    __ sve_lsr(as_FloatRegister($dst$$reg), __ H,
+         ptrue, as_FloatRegister($shift$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlsrI(vReg dst, vReg shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (URShiftVI dst shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_lsr(as_FloatRegister($dst$$reg), __ S,
+         ptrue, as_FloatRegister($shift$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlsrL(vReg dst, vReg shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst (URShiftVL dst shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_lsr(as_FloatRegister($dst$$reg), __ D,
+         ptrue, as_FloatRegister($shift$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+  match(Set dst (RShiftVB src shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (B)" %}
+  ins_encode %{
+    int con = (int)$shift$$constant;
+    if (con == 0) {
+      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }
+    if (con >= 8) con = 7;
+    __ sve_asr(as_FloatRegister($dst$$reg), __ B,
+         as_FloatRegister($src$$reg), con);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+  match(Set dst (RShiftVS src shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (H)" %}
+  ins_encode %{
+    int con = (int)$shift$$constant;
+    if (con == 0) {
+      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }
+    if (con >= 16) con = 15;
+    __ sve_asr(as_FloatRegister($dst$$reg), __ H,
+         as_FloatRegister($src$$reg), con);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (RShiftVI src shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (S)" %}
+  ins_encode %{
+    int con = (int)$shift$$constant;
+    if (con == 0) {
+      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }
+    __ sve_asr(as_FloatRegister($dst$$reg), __ S,
+         as_FloatRegister($src$$reg), con);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst (RShiftVL src shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (D)" %}
+  ins_encode %{
+    int con = (int)$shift$$constant;
+    if (con == 0) {
+      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }
+    __ sve_asr(as_FloatRegister($dst$$reg), __ D,
+         as_FloatRegister($src$$reg), con);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+  match(Set dst (URShiftVB src shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (B)" %}
+  ins_encode %{
+    int con = (int)$shift$$constant;
+    if (con == 0) {
+      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }
+    if (con >= 8) {
+      __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }
+    __ sve_lsr(as_FloatRegister($dst$$reg), __ B,
+         as_FloatRegister($src$$reg), con);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+  match(Set dst (URShiftVS src shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (H)" %}
+  ins_encode %{
+    int con = (int)$shift$$constant;
+    if (con == 0) {
+      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }
+    if (con >= 8) {
+      __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }
+    __ sve_lsr(as_FloatRegister($dst$$reg), __ H,
+         as_FloatRegister($src$$reg), con);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (URShiftVI src shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %}
+  ins_encode %{
+    int con = (int)$shift$$constant;
+    if (con == 0) {
+      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }
+    __ sve_lsr(as_FloatRegister($dst$$reg), __ S,
+         as_FloatRegister($src$$reg), con);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst (URShiftVL src shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %}
+  ins_encode %{
+    int con = (int)$shift$$constant;
+    if (con == 0) {
+      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }
+    __ sve_lsr(as_FloatRegister($dst$$reg), __ D,
+         as_FloatRegister($src$$reg), con);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+  match(Set dst (LShiftVB src shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %}
+  ins_encode %{
+    int con = (int)$shift$$constant;
+    if (con >= 8) {
+      __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }
+    __ sve_lsl(as_FloatRegister($dst$$reg), __ B,
+         as_FloatRegister($src$$reg), con);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+  match(Set dst (LShiftVS src shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %}
+  ins_encode %{
+    int con = (int)$shift$$constant;
+    if (con >= 8) {
+      __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }
+    __ sve_lsl(as_FloatRegister($dst$$reg), __ H,
+         as_FloatRegister($src$$reg), con);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (LShiftVI src shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %}
+  ins_encode %{
+    int con = (int)$shift$$constant;
+    __ sve_lsl(as_FloatRegister($dst$$reg), __ S,
+         as_FloatRegister($src$$reg), con);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst (LShiftVL src shift));
+  ins_cost(SVE_COST);
+  format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %}
+  ins_encode %{
+    int con = (int)$shift$$constant;
+    __ sve_lsl(as_FloatRegister($dst$$reg), __ D,
+         as_FloatRegister($src$$reg), con);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16 &&
+            (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE));
+  match(Set dst (LShiftCntV cnt));
+  match(Set dst (RShiftCntV cnt));
+  format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (B)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($cnt$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8 &&
+            (n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
+            (n->bottom_type()->is_vect()->element_basic_type() == T_CHAR)));
+  match(Set dst (LShiftCntV cnt));
+  match(Set dst (RShiftCntV cnt));
+  format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (H)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($cnt$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 &&
+            (n->bottom_type()->is_vect()->element_basic_type() == T_INT));
+  match(Set dst (LShiftCntV cnt));
+  match(Set dst (RShiftCntV cnt));
+  format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (S)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($cnt$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
+            (n->bottom_type()->is_vect()->element_basic_type() == T_LONG));
+  match(Set dst (LShiftCntV cnt));
+  match(Set dst (RShiftCntV cnt));
+  format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (D)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($cnt$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector sqrt
+
+instruct vsqrtF(vReg dst, vReg src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+  match(Set dst (SqrtVF src));
+  ins_cost(SVE_COST);
+  format %{ "sve_fsqrt $dst, $src\t# vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_fsqrt(as_FloatRegister($dst$$reg), __ S,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsqrtD(vReg dst, vReg src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+  match(Set dst (SqrtVD src));
+  ins_cost(SVE_COST);
+  format %{ "sve_fsqrt $dst, $src\t# vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_fsqrt(as_FloatRegister($dst$$reg), __ D,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector sub
+
+instruct vsubB(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+  match(Set dst (SubVB src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (B)" %}
+  ins_encode %{
+    __ sve_sub(as_FloatRegister($dst$$reg), __ B,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubS(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+  match(Set dst (SubVS src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (H)" %}
+  ins_encode %{
+    __ sve_sub(as_FloatRegister($dst$$reg), __ H,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubI(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (SubVI src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_sub(as_FloatRegister($dst$$reg), __ S,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubL(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst (SubVL src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_sub(as_FloatRegister($dst$$reg), __ D,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubF(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (SubVF src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (S)" %}
+  ins_encode %{
+    __ sve_fsub(as_FloatRegister($dst$$reg), __ S,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubD(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+  match(Set dst (SubVD src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (D)" %}
+  ins_encode %{
+    __ sve_fsub(as_FloatRegister($dst$$reg), __ D,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4	Tue Sep 08 15:28:06 2020 +0800
@@ -0,0 +1,767 @@
+//
+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, Arm Limited. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+dnl Generate the warning
+// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
+dnl
+
+// AArch64 SVE Architecture Description File
+
+dnl
+dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET($1,            $2,       $3     )
+dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET(imm_type_abbr, imm_type, imm_len)
+define(`OPERAND_VMEMORYA_IMMEDIATE_OFFSET', `
+operand vmemA_imm$1Offset$3()
+%{
+  predicate(Address::offset_ok_for_sve_immed(n->get_$2(), $3,
+            Matcher::scalable_vector_reg_size(T_BYTE)));
+  match(Con$1);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}')
+dnl
+// 4 bit signed offset -- for predicated load/store
+OPERAND_VMEMORYA_IMMEDIATE_OFFSET(I, int,  4)
+OPERAND_VMEMORYA_IMMEDIATE_OFFSET(L, long, 4)
+dnl
+dnl OPERAND_VMEMORYA_INDIRECT_OFFSET($1,            $2     )
+dnl OPERAND_VMEMORYA_INDIRECT_OFFSET(imm_type_abbr, imm_len)
+define(`OPERAND_VMEMORYA_INDIRECT_OFFSET', `
+operand vmemA_indOff$1$2(iRegP reg, vmemA_imm$1Offset$2 off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg off);
+  op_cost(0);
+  format %{ "[$reg, $off, MUL VL]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    `index'(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}')
+dnl
+OPERAND_VMEMORYA_INDIRECT_OFFSET(I, 4)
+OPERAND_VMEMORYA_INDIRECT_OFFSET(L, 4)
+
+opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
+
+source_hpp %{
+  bool op_sve_supported(int opcode);
+%}
+
+source %{
+
+  static inline BasicType vector_element_basic_type(const MachNode* n) {
+    const TypeVect* vt = n->bottom_type()->is_vect();
+    return vt->element_basic_type();
+  }
+
+  static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
+    int def_idx = use->operand_index(opnd);
+    Node* def = use->in(def_idx);
+    const TypeVect* vt = def->bottom_type()->is_vect();
+    return vt->element_basic_type();
+  }
+
+  typedef void (C2_MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
+                                                             PRegister Pg, const Address &adr);
+
+  // Predicated load/store, with optional ptrue to all elements of given predicate register.
+  static void loadStoreA_predicate(C2_MacroAssembler masm, bool is_store,
+                                   FloatRegister reg, PRegister pg, BasicType bt,
+                                   int opcode, Register base, int index, int size, int disp) {
+    sve_mem_insn_predicate insn;
+    Assembler::SIMD_RegVariant type;
+    int esize = type2aelembytes(bt);
+    if (index == -1) {
+      assert(size == 0, "unsupported address mode: scale size = %d", size);
+      switch(esize) {
+      case 1:
+        insn = is_store ? &C2_MacroAssembler::sve_st1b : &C2_MacroAssembler::sve_ld1b;
+        type = Assembler::B;
+        break;
+      case 2:
+        insn = is_store ? &C2_MacroAssembler::sve_st1h : &C2_MacroAssembler::sve_ld1h;
+        type = Assembler::H;
+        break;
+      case 4:
+        insn = is_store ? &C2_MacroAssembler::sve_st1w : &C2_MacroAssembler::sve_ld1w;
+        type = Assembler::S;
+        break;
+      case 8:
+        insn = is_store ? &C2_MacroAssembler::sve_st1d : &C2_MacroAssembler::sve_ld1d;
+        type = Assembler::D;
+        break;
+      default:
+        assert(false, "unsupported");
+        ShouldNotReachHere();
+      }
+      (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE)));
+    } else {
+      assert(false, "unimplemented");
+      ShouldNotReachHere();
+    }
+  }
+
+  bool op_sve_supported(int opcode) {
+    switch (opcode) {
+      case Op_MulAddVS2VI:
+        // No multiply reduction instructions
+      case Op_MulReductionVD:
+      case Op_MulReductionVF:
+      case Op_MulReductionVI:
+      case Op_MulReductionVL:
+        // Others
+      case Op_Extract:
+      case Op_ExtractB:
+      case Op_ExtractC:
+      case Op_ExtractD:
+      case Op_ExtractF:
+      case Op_ExtractI:
+      case Op_ExtractL:
+      case Op_ExtractS:
+      case Op_ExtractUB:
+        return false;
+      default:
+        return true;
+    }
+  }
+
+%}
+
+definitions %{
+  int_def SVE_COST             (200, 200);
+%}
+
+
+dnl
+dnl ELEMENT_SHORT_CHART($1, $2)
+dnl ELEMENT_SHORT_CHART(etype, node)
+define(`ELEMENT_SHORT_CHAR',`ifelse(`$1', `T_SHORT',
+  `($2->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
+            ($2->bottom_type()->is_vect()->element_basic_type() == T_CHAR))',
+   `($2->bottom_type()->is_vect()->element_basic_type() == $1)')')
+dnl
+
+// All SVE instructions
+
+// vector load/store
+
+// Use predicated vector load/store
+instruct loadV(vReg dst, vmemA mem) %{
+  predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16);
+  match(Set dst (LoadVector mem));
+  ins_cost(SVE_COST);
+  format %{ "sve_ldr $dst, $mem\t # vector (sve)" %}
+  ins_encode %{
+    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
+    loadStoreA_predicate(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue,
+                         vector_element_basic_type(this), $mem->opcode(),
+                         as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct storeV(vReg src, vmemA mem) %{
+  predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16);
+  match(Set mem (StoreVector mem src));
+  ins_cost(SVE_COST);
+  format %{ "sve_str $mem, $src\t # vector (sve)" %}
+  ins_encode %{
+    FloatRegister src_reg = as_FloatRegister($src$$reg);
+    loadStoreA_predicate(C2_MacroAssembler(&cbuf), true, src_reg, ptrue,
+                         vector_element_basic_type(this, $src), $mem->opcode(),
+                         as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+dnl
+dnl UNARY_OP_TRUE_PREDICATE_ETYPE($1,        $2,      $3,           $4,   $5,          %6  )
+dnl UNARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn)
+define(`UNARY_OP_TRUE_PREDICATE_ETYPE', `
+instruct $1(vReg dst, vReg src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 &&
+            n->bottom_type()->is_vect()->element_basic_type() == $3);
+  match(Set dst ($2 src));
+  ins_cost(SVE_COST);
+  format %{ "$6 $dst, $src\t# vector (sve) ($4)" %}
+  ins_encode %{
+    __ $6(as_FloatRegister($dst$$reg), __ $4,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+
+// vector abs
+UNARY_OP_TRUE_PREDICATE_ETYPE(vabsB, AbsVB, T_BYTE,   B, 16, sve_abs)
+UNARY_OP_TRUE_PREDICATE_ETYPE(vabsS, AbsVS, T_SHORT,  H, 8,  sve_abs)
+UNARY_OP_TRUE_PREDICATE_ETYPE(vabsI, AbsVI, T_INT,    S, 4,  sve_abs)
+UNARY_OP_TRUE_PREDICATE_ETYPE(vabsL, AbsVL, T_LONG,   D, 2,  sve_abs)
+UNARY_OP_TRUE_PREDICATE_ETYPE(vabsF, AbsVF, T_FLOAT,  S, 4,  sve_fabs)
+UNARY_OP_TRUE_PREDICATE_ETYPE(vabsD, AbsVD, T_DOUBLE, D, 2,  sve_fabs)
+dnl
+dnl BINARY_OP_UNPREDICATED($1,        $2       $3,   $4           $5  )
+dnl BINARY_OP_UNPREDICATED(insn_name, op_name, size, min_vec_len, insn)
+define(`BINARY_OP_UNPREDICATED', `
+instruct $1(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
+  match(Set dst ($2 src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "$5 $dst, $src1, $src2\t # vector (sve) ($3)" %}
+  ins_encode %{
+    __ $5(as_FloatRegister($dst$$reg), __ $3,
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+
+// vector add
+BINARY_OP_UNPREDICATED(vaddB, AddVB, B, 16, sve_add)
+BINARY_OP_UNPREDICATED(vaddS, AddVS, H, 8,  sve_add)
+BINARY_OP_UNPREDICATED(vaddI, AddVI, S, 4,  sve_add)
+BINARY_OP_UNPREDICATED(vaddL, AddVL, D, 2,  sve_add)
+BINARY_OP_UNPREDICATED(vaddF, AddVF, S, 4,  sve_fadd)
+BINARY_OP_UNPREDICATED(vaddD, AddVD, D, 2,  sve_fadd)
+dnl
+dnl BINARY_OP_UNSIZED($1,        $2,      $3,          $4  )
+dnl BINARY_OP_UNSIZED(insn_name, op_name, min_vec_len, insn)
+define(`BINARY_OP_UNSIZED', `
+instruct $1(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $3);
+  match(Set dst ($2 src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "$4  $dst, $src1, $src2\t# vector (sve)" %}
+  ins_encode %{
+    __ $4(as_FloatRegister($dst$$reg),
+         as_FloatRegister($src1$$reg),
+         as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+
+// vector and
+BINARY_OP_UNSIZED(vand, AndV, 16, sve_and)
+
+// vector or
+BINARY_OP_UNSIZED(vor, OrV, 16, sve_orr)
+
+// vector xor
+BINARY_OP_UNSIZED(vxor, XorV, 16, sve_eor)
+dnl
+dnl VDIVF($1,          $2  , $3         )
+dnl VDIVF(name_suffix, size, min_vec_len)
+define(`VDIVF', `
+instruct vdiv$1(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
+  match(Set dst_src1 (DivV$1 dst_src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fdiv  $dst_src1, $dst_src1, $src2\t# vector (sve) ($2)" %}
+  ins_encode %{
+    __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ $2,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+
+// vector float div
+VDIVF(F, S, 4)
+VDIVF(D, D, 2)
+
+dnl
+dnl BINARY_OP_TRUE_PREDICATE_ETYPE($1,        $2,      $3,           $4,   $5,          $6  )
+dnl BINARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn)
+define(`BINARY_OP_TRUE_PREDICATE_ETYPE', `
+instruct $1(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 &&
+            n->bottom_type()->is_vect()->element_basic_type() == $3);
+  match(Set dst_src1 ($2 dst_src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "$6 $dst_src1, $dst_src1, $src2\t # vector (sve) ($4)" %}
+  ins_encode %{
+    __ $6(as_FloatRegister($dst_src1$$reg), __ $4,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector max
+BINARY_OP_TRUE_PREDICATE_ETYPE(vmaxF, MaxV, T_FLOAT,  S, 4,  sve_fmax)
+BINARY_OP_TRUE_PREDICATE_ETYPE(vmaxD, MaxV, T_DOUBLE, D, 2,  sve_fmax)
+BINARY_OP_TRUE_PREDICATE_ETYPE(vminF, MinV, T_FLOAT,  S, 4,  sve_fmin)
+BINARY_OP_TRUE_PREDICATE_ETYPE(vminD, MinV, T_DOUBLE, D, 2,  sve_fmin)
+
+dnl
+dnl VFMLA($1           $2    $3         )
+dnl VFMLA(name_suffix, size, min_vec_len)
+define(`VFMLA', `
+// dst_src1 = dst_src1 + src2 * src3
+instruct vfmla$1(vReg dst_src1, vReg src2, vReg src3) %{
+  predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
+  match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
+  ins_encode %{
+    __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ $2,
+         ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fmla
+VFMLA(F, S, 4)
+VFMLA(D, D, 2)
+
+dnl
+dnl VFMLS($1           $2    $3         )
+dnl VFMLS(name_suffix, size, min_vec_len)
+define(`VFMLS', `
+// dst_src1 = dst_src1 + -src2 * src3
+// dst_src1 = dst_src1 + src2 * -src3
+instruct vfmls$1(vReg dst_src1, vReg src2, vReg src3) %{
+  predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
+  match(Set dst_src1 (FmaV$1 dst_src1 (Binary (NegV$1 src2) src3)));
+  match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 (NegV$1 src3))));
+  ins_cost(SVE_COST);
+  format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
+  ins_encode %{
+    __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ $2,
+         ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fmls
+VFMLS(F, S, 4)
+VFMLS(D, D, 2)
+
+dnl
+dnl VFNMLA($1           $2    $3         )
+dnl VFNMLA(name_suffix, size, min_vec_len)
+define(`VFNMLA', `
+// dst_src1 = -dst_src1 + -src2 * src3
+// dst_src1 = -dst_src1 + src2 * -src3
+instruct vfnmla$1(vReg dst_src1, vReg src2, vReg src3) %{
+  predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
+  match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary (NegV$1 src2) src3)));
+  match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 (NegV$1 src3))));
+  ins_cost(SVE_COST);
+  format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
+  ins_encode %{
+    __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ $2,
+         ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fnmla
+VFNMLA(F, S, 4)
+VFNMLA(D, D, 2)
+
+dnl
+dnl VFNMLS($1           $2    $3         )
+dnl VFNMLS(name_suffix, size, min_vec_len)
+define(`VFNMLS', `
+// dst_src1 = -dst_src1 + src2 * src3
+instruct vfnmls$1(vReg dst_src1, vReg src2, vReg src3) %{
+  predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
+  match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
+  ins_encode %{
+    __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ $2,
+         ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fnmls
+VFNMLS(F, S, 4)
+VFNMLS(D, D, 2)
+
+dnl
+dnl VMLA($1           $2    $3         )
+dnl VMLA(name_suffix, size, min_vec_len)
+define(`VMLA', `
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmla$1(vReg dst_src1, vReg src2, vReg src3)
+%{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
+  match(Set dst_src1 (AddV$1 dst_src1 (MulV$1 src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) ($2)" %}
+  ins_encode %{
+    __ sve_mla(as_FloatRegister($dst_src1$$reg), __ $2,
+      ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector mla
+VMLA(B, B, 16)
+VMLA(S, H, 8)
+VMLA(I, S, 4)
+VMLA(L, D, 2)
+
+dnl
+dnl VMLS($1           $2    $3         )
+dnl VMLS(name_suffix, size, min_vec_len)
+define(`VMLS', `
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmls$1(vReg dst_src1, vReg src2, vReg src3)
+%{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
+  match(Set dst_src1 (SubV$1 dst_src1 (MulV$1 src2 src3)));
+  ins_cost(SVE_COST);
+  format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) ($2)" %}
+  ins_encode %{
+    __ sve_mls(as_FloatRegister($dst_src1$$reg), __ $2,
+      ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector mls
+VMLS(B, B, 16)
+VMLS(S, H, 8)
+VMLS(I, S, 4)
+VMLS(L, D, 2)
+
+dnl
+dnl BINARY_OP_TRUE_PREDICATE($1,        $2,      $3,   $4,          $5  )
+dnl BINARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
+define(`BINARY_OP_TRUE_PREDICATE', `
+instruct $1(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
+  match(Set dst_src1 ($2 dst_src1 src2));
+  ins_cost(SVE_COST);
+  format %{ "$5 $dst_src1, $dst_src1, $src2\t # vector (sve) ($3)" %}
+  ins_encode %{
+    __ $5(as_FloatRegister($dst_src1$$reg), __ $3,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+
+// vector mul
+BINARY_OP_TRUE_PREDICATE(vmulB, MulVB, B, 16, sve_mul)
+BINARY_OP_TRUE_PREDICATE(vmulS, MulVS, H, 8,  sve_mul)
+BINARY_OP_TRUE_PREDICATE(vmulI, MulVI, S, 4,  sve_mul)
+BINARY_OP_TRUE_PREDICATE(vmulL, MulVL, D, 2,  sve_mul)
+BINARY_OP_UNPREDICATED(vmulF, MulVF, S, 4, sve_fmul)
+BINARY_OP_UNPREDICATED(vmulD, MulVD, D, 2, sve_fmul)
+
+dnl
+dnl UNARY_OP_TRUE_PREDICATE($1,        $2,      $3,   $4,            $5  )
+dnl UNARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_bytes, insn)
+define(`UNARY_OP_TRUE_PREDICATE', `
+instruct $1(vReg dst, vReg src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $4);
+  match(Set dst ($2 src));
+  ins_cost(SVE_COST);
+  format %{ "$5 $dst, $src\t# vector (sve) ($3)" %}
+  ins_encode %{
+    __ $5(as_FloatRegister($dst$$reg), __ $3,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fneg
+UNARY_OP_TRUE_PREDICATE(vnegF, NegVF, S, 16, sve_fneg)
+UNARY_OP_TRUE_PREDICATE(vnegD, NegVD, D, 16, sve_fneg)
+
+// popcount vector
+
+instruct vpopcountI(vReg dst, vReg src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+  match(Set dst (PopCountVI src));
+  format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t"  %}
+  ins_encode %{
+     __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+dnl
+dnl REDUCE_ADD($1,        $2,      $3,      $4,      $5,   $6,        $7   )
+dnl REDUCE_ADD(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1)
+define(`REDUCE_ADD', `
+instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{
+  predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+            ELEMENT_SHORT_CHAR($6, n->in(2)));
+  match(Set dst ($2 src1 src2));
+  effect(TEMP_DEF dst, TEMP tmp);
+  ins_cost(SVE_COST);
+  format %{ "sve_uaddv $tmp, $src2\t# vector (sve) ($5)\n\t"
+            "umov  $dst, $tmp, $5, 0\n\t"
+            "$7  $dst, $dst, $src1\t # add reduction $5" %}
+  ins_encode %{
+    __ sve_uaddv(as_FloatRegister($tmp$$reg), __ $5,
+         ptrue, as_FloatRegister($src2$$reg));
+    __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ $5, 0);
+    __ $7($dst$$Register, $dst$$Register, $src1$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl REDUCE_ADDF($1,        $2,      $3,      $4  )
+dnl REDUCE_ADDF(insn_name, op_name, reg_dst, size)
+define(`REDUCE_ADDF', `
+instruct $1($3 src1_dst, vReg src2) %{
+  predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+  match(Set src1_dst ($2 src1_dst src2));
+  ins_cost(SVE_COST);
+  format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) ($4)" %}
+  ins_encode %{
+    __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
+         ptrue, as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector add reduction
+REDUCE_ADD(reduce_addI, AddReductionVI, iRegINoSp, iRegIorL2I, S, T_INT, addw)
+REDUCE_ADD(reduce_addL, AddReductionVL, iRegLNoSp, iRegL, D, T_LONG, add)
+REDUCE_ADDF(reduce_addF, AddReductionVF, vRegF, S)
+REDUCE_ADDF(reduce_addD, AddReductionVD, vRegD, D)
+
+dnl
+dnl REDUCE_FMINMAX($1,      $2,          $3,           $4,   $5         )
+dnl REDUCE_FMINMAX(min_max, name_suffix, element_type, size, reg_src_dst)
+define(`REDUCE_FMINMAX', `
+instruct reduce_$1$2($5 dst, $5 src1, vReg src2) %{
+  predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 &&
+            n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+  match(Set dst (translit($1, `m', `M')ReductionV src1 src2));
+  ins_cost(INSN_COST);
+  effect(TEMP_DEF dst);
+  format %{ "sve_f$1v $dst, $src2 # vector (sve) (S)\n\t"
+            "f$1s $dst, $dst, $src1\t # $1 reduction $2" %}
+  ins_encode %{
+    __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4,
+         ptrue, as_FloatRegister($src2$$reg));
+    __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+// vector max reduction
+REDUCE_FMINMAX(max, F, T_FLOAT,  S, vRegF)
+REDUCE_FMINMAX(max, D, T_DOUBLE, D, vRegD)
+
+// vector min reduction
+REDUCE_FMINMAX(min, F, T_FLOAT,  S, vRegF)
+REDUCE_FMINMAX(min, D, T_DOUBLE, D, vRegD)
+
+// vector Math.rint, floor, ceil
+
+instruct vroundD(vReg dst, vReg src, immI rmode) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
+            n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+  match(Set dst (RoundDoubleModeV src rmode));
+  format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %}
+  ins_encode %{
+    switch ($rmode$$constant) {
+      case RoundDoubleModeNode::rmode_rint:
+        __ sve_frintn(as_FloatRegister($dst$$reg), __ D,
+             ptrue, as_FloatRegister($src$$reg));
+        break;
+      case RoundDoubleModeNode::rmode_floor:
+        __ sve_frintm(as_FloatRegister($dst$$reg), __ D,
+             ptrue, as_FloatRegister($src$$reg));
+        break;
+      case RoundDoubleModeNode::rmode_ceil:
+        __ sve_frintp(as_FloatRegister($dst$$reg), __ D,
+             ptrue, as_FloatRegister($src$$reg));
+        break;
+    }
+  %}
+  ins_pipe(pipe_slow);
+%}
+dnl
+dnl REPLICATE($1,        $2,      $3,      $4,   $5         )
+dnl REPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
+define(`REPLICATE', `
+instruct $1(vReg dst, $3 src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
+  match(Set dst ($2 src));
+  ins_cost(SVE_COST);
+  format %{ "sve_dup  $dst, $src\t# vector (sve) ($4)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ $4, as_Register($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl REPLICATE_IMM8($1,        $2,      $3,       $4,   $5         )
+dnl REPLICATE_IMM8(insn_name, op_name, imm_type, size, min_vec_len)
+define(`REPLICATE_IMM8', `
+instruct $1(vReg dst, $3 con) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
+  match(Set dst ($2 con));
+  ins_cost(SVE_COST);
+  format %{ "sve_dup  $dst, $con\t# vector (sve) ($4)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ $4, $con$$constant);
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl FREPLICATE($1,        $2,      $3,      $4,   $5         )
+dnl FREPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
+define(`FREPLICATE', `
+instruct $1(vReg dst, $3 src) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
+  match(Set dst ($2 src));
+  ins_cost(SVE_COST);
+  format %{ "sve_cpy  $dst, $src\t# vector (sve) ($4)" %}
+  ins_encode %{
+    __ sve_cpy(as_FloatRegister($dst$$reg), __ $4,
+         ptrue, as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+
+// vector replicate
+REPLICATE(replicateB, ReplicateB, iRegIorL2I, B, 16)
+REPLICATE(replicateS, ReplicateS, iRegIorL2I, H, 8)
+REPLICATE(replicateI, ReplicateI, iRegIorL2I, S, 4)
+REPLICATE(replicateL, ReplicateL, iRegL,      D, 2)
+
+REPLICATE_IMM8(replicateB_imm8, ReplicateB, immI8,        B, 16)
+REPLICATE_IMM8(replicateS_imm8, ReplicateS, immI8_shift8, H, 8)
+REPLICATE_IMM8(replicateI_imm8, ReplicateI, immI8_shift8, S, 4)
+REPLICATE_IMM8(replicateL_imm8, ReplicateL, immL8_shift8, D, 2)
+
+FREPLICATE(replicateF, ReplicateF, vRegF, S, 4)
+FREPLICATE(replicateD, ReplicateD, vRegD, D, 2)
+dnl
+dnl VSHIFT_TRUE_PREDICATE($1,        $2,      $3,   $4,          $5  )
+dnl VSHIFT_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
+define(`VSHIFT_TRUE_PREDICATE', `
+instruct $1(vReg dst, vReg shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
+  match(Set dst ($2 dst shift));
+  ins_cost(SVE_COST);
+  format %{ "$5 $dst, $dst, $shift\t# vector (sve) ($3)" %}
+  ins_encode %{
+    __ $5(as_FloatRegister($dst$$reg), __ $3,
+         ptrue, as_FloatRegister($shift$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl VSHIFT_IMM_UNPREDICATE($1,        $2,      $3,   $4,          $5  )
+dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn)
+define(`VSHIFT_IMM_UNPREDICATE', `
+instruct $1(vReg dst, vReg src, immI shift) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
+  match(Set dst ($2 src shift));
+  ins_cost(SVE_COST);
+  format %{ "$5 $dst, $src, $shift\t# vector (sve) ($3)" %}
+  ins_encode %{
+    int con = (int)$shift$$constant;dnl
+ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
+    if (con == 0) {
+      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }')dnl
+ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$3', `B') == 0), 1, `
+    if (con >= 8) con = 7;')ifelse(eval(index(`$3', `H') == 0), 1, `
+    if (con >= 16) con = 15;')')dnl
+ifelse(eval((index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0) && (index(`$3', `B') == 0 || index(`$3', `H') == 0)), 1, `
+    if (con >= 8) {
+      __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }')
+    __ $5(as_FloatRegister($dst$$reg), __ $3,
+         as_FloatRegister($src$$reg), con);
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl VSHIFT_COUNT($1,        $2,   $3,          $4  )
+dnl VSHIFT_COUNT(insn_name, size, min_vec_len, type)
+define(`VSHIFT_COUNT', `
+instruct $1(vReg dst, iRegIorL2I cnt) %{
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= $3 &&
+            ELEMENT_SHORT_CHAR($4, n));
+  match(Set dst (LShiftCntV cnt));
+  match(Set dst (RShiftCntV cnt));
+  format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) ($2)" %}
+  ins_encode %{
+    __ sve_dup(as_FloatRegister($dst$$reg), __ $2, as_Register($cnt$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+
+// vector shift
+VSHIFT_TRUE_PREDICATE(vasrB, RShiftVB,  B, 16, sve_asr)
+VSHIFT_TRUE_PREDICATE(vasrS, RShiftVS,  H,  8, sve_asr)
+VSHIFT_TRUE_PREDICATE(vasrI, RShiftVI,  S,  4, sve_asr)
+VSHIFT_TRUE_PREDICATE(vasrL, RShiftVL,  D,  2, sve_asr)
+VSHIFT_TRUE_PREDICATE(vlslB, LShiftVB,  B, 16, sve_lsl)
+VSHIFT_TRUE_PREDICATE(vlslS, LShiftVS,  H,  8, sve_lsl)
+VSHIFT_TRUE_PREDICATE(vlslI, LShiftVI,  S,  4, sve_lsl)
+VSHIFT_TRUE_PREDICATE(vlslL, LShiftVL,  D,  2, sve_lsl)
+VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr)
+VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H,  8, sve_lsr)
+VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S,  4, sve_lsr)
+VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D,  2, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB,  B, 16, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS,  H,  8, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI,  S,  4, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL,  D,  2, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, B, 16, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, H,  8, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, S,  4, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, D,  2, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB,  B, 16, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS,  H,  8, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI,  S,  4, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL,  D,  2, sve_lsl)
+VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE)
+VSHIFT_COUNT(vshiftcntS, H,  8, T_SHORT)
+VSHIFT_COUNT(vshiftcntI, S,  4, T_INT)
+VSHIFT_COUNT(vshiftcntL, D,  2, T_LONG)
+
+// vector sqrt
+UNARY_OP_TRUE_PREDICATE(vsqrtF, SqrtVF, S, 16, sve_fsqrt)
+UNARY_OP_TRUE_PREDICATE(vsqrtD, SqrtVD, D, 16, sve_fsqrt)
+
+// vector sub
+BINARY_OP_UNPREDICATED(vsubB, SubVB, B, 16, sve_sub)
+BINARY_OP_UNPREDICATED(vsubS, SubVS, H, 8, sve_sub)
+BINARY_OP_UNPREDICATED(vsubI, SubVI, S, 4, sve_sub)
+BINARY_OP_UNPREDICATED(vsubL, SubVL, D, 2, sve_sub)
+BINARY_OP_UNPREDICATED(vsubF, SubVF, S, 4, sve_fsub)
+BINARY_OP_UNPREDICATED(vsubD, SubVD, D, 2, sve_fsub)
+
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -756,7 +756,7 @@
     __ fcmge(v18, __ T2D, v19, v20);                   //       fcmge   v18.2D, v19.2D, v20.2D
 
 // SHA512SIMDOp
-    __ sha512h(v13, __ T2D, v4, v28);                  //       sha512h                 q13, q4, v28.2D
+    __ sha512h(v13, __ T2D, v4, v28);                  //       sha512h         q13, q4, v28.2D
     __ sha512h2(v23, __ T2D, v21, v25);                //       sha512h2                q23, q21, v25.2D
     __ sha512su0(v24, __ T2D, v3);                     //       sha512su0               v24.2D, v3.2D
     __ sha512su1(v23, __ T2D, v26, v23);               //       sha512su1               v23.2D, v26.2D, v23.2D
@@ -777,6 +777,39 @@
     __ mov(v1, __ T4H, 2, zr);                         //       mov     v1.h[2], wzr
     __ mov(v1, __ T8B, 3, zr);                         //       mov     v1.b[3], wzr
     __ ld1(v31, v0, __ T2D, Address(__ post(r1, r0))); //       ld1     {v31.2d, v0.2d}, [x1], x0
+    __ sve_cpy(z0, __ S, p0, v1);                      //       mov     z0.s, p0/m, s1
+    __ sve_inc(r0, __ S);                              //       incw    x0
+    __ sve_dec(r1, __ H);                              //       dech    x1
+    __ sve_lsl(z0, __ B, z1, 7);                       //       lsl     z0.b, z1.b, #7
+    __ sve_lsl(z21, __ H, z1, 15);                     //       lsl     z21.h, z1.h, #15
+    __ sve_lsl(z0, __ S, z1, 31);                      //       lsl     z0.s, z1.s, #31
+    __ sve_lsl(z0, __ D, z1, 63);                      //       lsl     z0.d, z1.d, #63
+    __ sve_lsr(z0, __ B, z1, 7);                       //       lsr     z0.b, z1.b, #7
+    __ sve_asr(z0, __ H, z11, 15);                     //       asr     z0.h, z11.h, #15
+    __ sve_lsr(z30, __ S, z1, 31);                     //       lsr     z30.s, z1.s, #31
+    __ sve_asr(z0, __ D, z1, 63);                      //       asr     z0.d, z1.d, #63
+    __ sve_addvl(sp, r0, 31);                          //       addvl   sp, x0, #31
+    __ sve_addpl(r1, sp, -32);                         //       addpl   x1, sp, -32
+    __ sve_cntp(r8, __ B, p0, p1);                     //       cntp    x8, p0, p1.b
+    __ sve_dup(z0, __ B, 127);                         //       dup     z0.b, 127
+    __ sve_dup(z1, __ H, -128);                        //       dup     z1.h, -128
+    __ sve_dup(z2, __ S, 32512);                       //       dup     z2.s, 32512
+    __ sve_dup(z7, __ D, -32768);                      //       dup     z7.d, -32768
+    __ sve_ld1b(z0, __ B, p0, Address(sp));            //       ld1b    {z0.b}, p0/z, [sp]
+    __ sve_ld1h(z10, __ H, p1, Address(sp, -8));       //       ld1h    {z10.h}, p1/z, [sp, #-8, MUL VL]
+    __ sve_ld1w(z20, __ S, p2, Address(r0, 7));        //       ld1w    {z20.s}, p2/z, [x0, #7, MUL VL]
+    __ sve_ld1b(z30, __ B, p3, Address(sp, r8));       //       ld1b    {z30.b}, p3/z, [sp, x8]
+    __ sve_ld1w(z0, __ S, p4, Address(sp, r28));       //       ld1w    {z0.s}, p4/z, [sp, x28, LSL #2]
+    __ sve_ld1d(z11, __ D, p5, Address(r0, r1));       //       ld1d    {z11.d}, p5/z, [x0, x1, LSL #3]
+    __ sve_st1b(z22, __ B, p6, Address(sp));           //       st1b    {z22.b}, p6, [sp]
+    __ sve_st1b(z31, __ B, p7, Address(sp, -8));       //       st1b    {z31.b}, p7, [sp, #-8, MUL VL]
+    __ sve_st1w(z0, __ S, p1, Address(r0, 7));         //       st1w    {z0.s}, p1, [x0, #7, MUL VL]
+    __ sve_st1b(z0, __ B, p2, Address(sp, r1));        //       st1b    {z0.b}, p2, [sp, x1]
+    __ sve_st1h(z0, __ H, p3, Address(sp, r8));        //       st1h    {z0.h}, p3, [sp, x8, LSL #1]
+    __ sve_st1d(z0, __ D, p4, Address(r0, r18));       //       st1d    {z0.d}, p4, [x0, x18, LSL #3]
+    __ sve_ldr(z0, Address(sp));                       //       ldr     z0, [sp]
+    __ sve_ldr(z31, Address(sp, -256));                //       ldr     z31, [sp, #-256, MUL VL]
+    __ sve_str(z8, Address(r8, 255));                  //       str     z8, [x8, #255, MUL VL]
 
 // FloatImmediateOp
     __ fmovd(v0, 2.0);                                 //       fmov d0, #2.0
@@ -829,17 +862,17 @@
     __ ldbica(Assembler::xword, r28, r30, sp);         //       ldclra  x28, x30, [sp]
     __ ldeora(Assembler::xword, r1, r26, r28);         //       ldeora  x1, x26, [x28]
     __ ldorra(Assembler::xword, r4, r30, r4);          //       ldseta  x4, x30, [x4]
-    __ ldsmina(Assembler::xword, r6, r30, r26);        //       ldsmina         x6, x30, [x26]
-    __ ldsmaxa(Assembler::xword, r18, r9, r8);         //       ldsmaxa         x18, x9, [x8]
-    __ ldumina(Assembler::xword, r12, r0, r20);        //       ldumina         x12, x0, [x20]
-    __ ldumaxa(Assembler::xword, r1, r24, r2);         //       ldumaxa         x1, x24, [x2]
+    __ ldsmina(Assembler::xword, r6, r30, r26);        //       ldsmina x6, x30, [x26]
+    __ ldsmaxa(Assembler::xword, r18, r9, r8);         //       ldsmaxa x18, x9, [x8]
+    __ ldumina(Assembler::xword, r12, r0, r20);        //       ldumina x12, x0, [x20]
+    __ ldumaxa(Assembler::xword, r1, r24, r2);         //       ldumaxa x1, x24, [x2]
 
 // LSEOp
     __ swpal(Assembler::xword, r0, r9, r24);           //       swpal   x0, x9, [x24]
-    __ ldaddal(Assembler::xword, r26, r16, r30);       //       ldaddal         x26, x16, [x30]
-    __ ldbical(Assembler::xword, r3, r10, r23);        //       ldclral         x3, x10, [x23]
-    __ ldeoral(Assembler::xword, r10, r4, r18);        //       ldeoral         x10, x4, [x18]
-    __ ldorral(Assembler::xword, r2, r11, r8);         //       ldsetal         x2, x11, [x8]
+    __ ldaddal(Assembler::xword, r26, r16, r30);       //       ldaddal x26, x16, [x30]
+    __ ldbical(Assembler::xword, r3, r10, r23);        //       ldclral x3, x10, [x23]
+    __ ldeoral(Assembler::xword, r10, r4, r18);        //       ldeoral x10, x4, [x18]
+    __ ldorral(Assembler::xword, r2, r11, r8);         //       ldsetal x2, x11, [x8]
     __ ldsminal(Assembler::xword, r10, r15, r17);      //       ldsminal        x10, x15, [x17]
     __ ldsmaxal(Assembler::xword, r2, r10, r12);       //       ldsmaxal        x2, x10, [x12]
     __ lduminal(Assembler::xword, r12, r15, r13);      //       lduminal        x12, x15, [x13]
@@ -851,10 +884,10 @@
     __ ldbicl(Assembler::xword, r18, r21, r16);        //       ldclrl  x18, x21, [x16]
     __ ldeorl(Assembler::xword, r18, r11, r21);        //       ldeorl  x18, x11, [x21]
     __ ldorrl(Assembler::xword, r23, r12, r26);        //       ldsetl  x23, x12, [x26]
-    __ ldsminl(Assembler::xword, r23, r28, r14);       //       ldsminl         x23, x28, [x14]
-    __ ldsmaxl(Assembler::xword, r11, r24, r1);        //       ldsmaxl         x11, x24, [x1]
-    __ lduminl(Assembler::xword, r12, zr, r10);        //       lduminl         x12, xzr, [x10]
-    __ ldumaxl(Assembler::xword, r16, r7, r2);         //       ldumaxl         x16, x7, [x2]
+    __ ldsminl(Assembler::xword, r23, r28, r14);       //       ldsminl x23, x28, [x14]
+    __ ldsmaxl(Assembler::xword, r11, r24, r1);        //       ldsmaxl x11, x24, [x1]
+    __ lduminl(Assembler::xword, r12, zr, r10);        //       lduminl x12, xzr, [x10]
+    __ ldumaxl(Assembler::xword, r16, r7, r2);         //       ldumaxl x16, x7, [x2]
 
 // LSEOp
     __ swp(Assembler::word, r3, r13, r19);             //       swp     w3, w13, [x19]
@@ -873,17 +906,17 @@
     __ ldbica(Assembler::word, r29, r22, sp);          //       ldclra  w29, w22, [sp]
     __ ldeora(Assembler::word, r19, zr, r5);           //       ldeora  w19, wzr, [x5]
     __ ldorra(Assembler::word, r14, r18, sp);          //       ldseta  w14, w18, [sp]
-    __ ldsmina(Assembler::word, r18, r27, r20);        //       ldsmina         w18, w27, [x20]
-    __ ldsmaxa(Assembler::word, r16, r12, r11);        //       ldsmaxa         w16, w12, [x11]
-    __ ldumina(Assembler::word, r9, r6, r30);          //       ldumina         w9, w6, [x30]
-    __ ldumaxa(Assembler::word, r17, r27, r28);        //       ldumaxa         w17, w27, [x28]
+    __ ldsmina(Assembler::word, r18, r27, r20);        //       ldsmina w18, w27, [x20]
+    __ ldsmaxa(Assembler::word, r16, r12, r11);        //       ldsmaxa w16, w12, [x11]
+    __ ldumina(Assembler::word, r9, r6, r30);          //       ldumina w9, w6, [x30]
+    __ ldumaxa(Assembler::word, r17, r27, r28);        //       ldumaxa w17, w27, [x28]
 
 // LSEOp
     __ swpal(Assembler::word, r30, r7, r10);           //       swpal   w30, w7, [x10]
-    __ ldaddal(Assembler::word, r20, r10, r4);         //       ldaddal         w20, w10, [x4]
-    __ ldbical(Assembler::word, r24, r17, r17);        //       ldclral         w24, w17, [x17]
-    __ ldeoral(Assembler::word, r22, r3, r29);         //       ldeoral         w22, w3, [x29]
-    __ ldorral(Assembler::word, r15, r22, r19);        //       ldsetal         w15, w22, [x19]
+    __ ldaddal(Assembler::word, r20, r10, r4);         //       ldaddal w20, w10, [x4]
+    __ ldbical(Assembler::word, r24, r17, r17);        //       ldclral w24, w17, [x17]
+    __ ldeoral(Assembler::word, r22, r3, r29);         //       ldeoral w22, w3, [x29]
+    __ ldorral(Assembler::word, r15, r22, r19);        //       ldsetal w15, w22, [x19]
     __ ldsminal(Assembler::word, r19, r22, r2);        //       ldsminal        w19, w22, [x2]
     __ ldsmaxal(Assembler::word, r15, r6, r12);        //       ldsmaxal        w15, w6, [x12]
     __ lduminal(Assembler::word, r16, r11, r13);       //       lduminal        w16, w11, [x13]
@@ -895,697 +928,65 @@
     __ ldbicl(Assembler::word, r13, r10, r21);         //       ldclrl  w13, w10, [x21]
     __ ldeorl(Assembler::word, r29, r27, r12);         //       ldeorl  w29, w27, [x12]
     __ ldorrl(Assembler::word, r27, r3, r1);           //       ldsetl  w27, w3, [x1]
-    __ ldsminl(Assembler::word, zr, r24, r19);         //       ldsminl         wzr, w24, [x19]
-    __ ldsmaxl(Assembler::word, r17, r9, r28);         //       ldsmaxl         w17, w9, [x28]
-    __ lduminl(Assembler::word, r27, r15, r7);         //       lduminl         w27, w15, [x7]
-    __ ldumaxl(Assembler::word, r21, r23, sp);         //       ldumaxl         w21, w23, [sp]
+    __ ldsminl(Assembler::word, zr, r24, r19);         //       ldsminl wzr, w24, [x19]
+    __ ldsmaxl(Assembler::word, r17, r9, r28);         //       ldsmaxl w17, w9, [x28]
+    __ lduminl(Assembler::word, r27, r15, r7);         //       lduminl w27, w15, [x7]
+    __ ldumaxl(Assembler::word, r21, r23, sp);         //       ldumaxl w21, w23, [sp]
+
+// SVEVectorOp
+    __ sve_add(z24, __ D, z2, z30);                    //       add     z24.d, z2.d, z30.d
+    __ sve_sub(z18, __ S, z10, z22);                   //       sub     z18.s, z10.s, z22.s
+    __ sve_fadd(z2, __ D, z17, z0);                    //       fadd    z2.d, z17.d, z0.d
+    __ sve_fmul(z25, __ D, z22, z2);                   //       fmul    z25.d, z22.d, z2.d
+    __ sve_fsub(z12, __ D, z3, z27);                   //       fsub    z12.d, z3.d, z27.d
+    __ sve_abs(z28, __ B, p4, z26);                    //       abs     z28.b, p4/m, z26.b
+    __ sve_add(z9, __ B, p7, z18);                     //       add     z9.b, p7/m, z9.b, z18.b
+    __ sve_asr(z4, __ H, p1, z15);                     //       asr     z4.h, p1/m, z4.h, z15.h
+    __ sve_cnt(z22, __ D, p2, z2);                     //       cnt     z22.d, p2/m, z2.d
+    __ sve_lsl(z20, __ D, p7, z5);                     //       lsl     z20.d, p7/m, z20.d, z5.d
+    __ sve_lsr(z0, __ B, p4, z14);                     //       lsr     z0.b, p4/m, z0.b, z14.b
+    __ sve_mul(z25, __ S, p2, z27);                    //       mul     z25.s, p2/m, z25.s, z27.s
+    __ sve_neg(z26, __ S, p6, z24);                    //       neg     z26.s, p6/m, z24.s
+    __ sve_not(z0, __ S, p1, z6);                      //       not     z0.s, p1/m, z6.s
+    __ sve_smax(z0, __ B, p1, z15);                    //       smax    z0.b, p1/m, z0.b, z15.b
+    __ sve_smin(z9, __ H, p1, z5);                     //       smin    z9.h, p1/m, z9.h, z5.h
+    __ sve_sub(z27, __ S, p1, z20);                    //       sub     z27.s, p1/m, z27.s, z20.s
+    __ sve_fabs(z20, __ S, p1, z10);                   //       fabs    z20.s, p1/m, z10.s
+    __ sve_fadd(z16, __ D, p7, z6);                    //       fadd    z16.d, p7/m, z16.d, z6.d
+    __ sve_fdiv(z2, __ D, p3, z29);                    //       fdiv    z2.d, p3/m, z2.d, z29.d
+    __ sve_fmax(z2, __ D, p6, z22);                    //       fmax    z2.d, p6/m, z2.d, z22.d
+    __ sve_fmin(z14, __ D, p3, z27);                   //       fmin    z14.d, p3/m, z14.d, z27.d
+    __ sve_fmul(z23, __ S, p1, z2);                    //       fmul    z23.s, p1/m, z23.s, z2.s
+    __ sve_fneg(z10, __ D, p4, z10);                   //       fneg    z10.d, p4/m, z10.d
+    __ sve_frintm(z22, __ D, p3, z3);                  //       frintm  z22.d, p3/m, z3.d
+    __ sve_frintn(z16, __ D, p1, z1);                  //       frintn  z16.d, p1/m, z1.d
+    __ sve_frintp(z16, __ S, p4, z12);                 //       frintp  z16.s, p4/m, z12.s
+    __ sve_fsqrt(z12, __ S, p0, z16);                  //       fsqrt   z12.s, p0/m, z16.s
+    __ sve_fsub(z20, __ S, p5, z5);                    //       fsub    z20.s, p5/m, z20.s, z5.s
+    __ sve_fmla(z7, __ D, p4, z12, z27);               //       fmla    z7.d, p4/m, z12.d, z27.d
+    __ sve_fmls(z16, __ S, p1, z2, z28);               //       fmls    z16.s, p1/m, z2.s, z28.s
+    __ sve_fnmla(z4, __ S, p1, z17, z19);              //       fnmla   z4.s, p1/m, z17.s, z19.s
+    __ sve_fnmls(z12, __ D, p5, z8, z24);              //       fnmls   z12.d, p5/m, z8.d, z24.d
+    __ sve_mla(z18, __ B, p0, z10, z23);               //       mla     z18.b, p0/m, z10.b, z23.b
+    __ sve_mls(z19, __ B, p7, z13, z16);               //       mls     z19.b, p7/m, z13.b, z16.b
+    __ sve_and(z0, z7, z14);                           //       and     z0.d, z7.d, z14.d
+    __ sve_eor(z25, z8, z10);                          //       eor     z25.d, z8.d, z10.d
+    __ sve_orr(z20, z22, z27);                         //       orr     z20.d, z22.d, z27.d
+
+// SVEReductionOp
+    __ sve_andv(v3, __ S, p3, z18);                    //       andv s3, p3, z18.s
+    __ sve_orv(v7, __ B, p1, z28);                     //       orv b7, p1, z28.b
+    __ sve_eorv(v0, __ S, p2, z16);                    //       eorv s0, p2, z16.s
+    __ sve_smaxv(v22, __ H, p1, z15);                  //       smaxv h22, p1, z15.h
+    __ sve_sminv(v22, __ B, p2, z25);                  //       sminv b22, p2, z25.b
+    __ sve_fminv(v30, __ D, p4, z13);                  //       fminv d30, p4, z13.d
+    __ sve_fmaxv(v11, __ S, p0, z13);                  //       fmaxv s11, p0, z13.s
+    __ sve_fadda(v20, __ S, p4, z25);                  //       fadda s20, p4, s20, z25.s
+    __ sve_uaddv(v4, __ H, p1, z17);                   //       uaddv d4, p1, z17.h
 
     __ bind(forth);
 
 /*
-
-aarch64ops.o:     file format elf64-littleaarch64
-
-
-Disassembly of section .text:
-
-0000000000000000 <back>:
-   0:   8b0d82fa        add     x26, x23, x13, lsl #32
-   4:   cb49970c        sub     x12, x24, x9, lsr #37
-   8:   ab889dfc        adds    x28, x15, x8, asr #39
-   c:   eb9ee787        subs    x7, x28, x30, asr #57
-  10:   0b9b3ec9        add     w9, w22, w27, asr #15
-  14:   4b9279a3        sub     w3, w13, w18, asr #30
-  18:   2b88474e        adds    w14, w26, w8, asr #17
-  1c:   6b8c56c0        subs    w0, w22, w12, asr #21
-  20:   8a1a51e0        and     x0, x15, x26, lsl #20
-  24:   aa11f4ba        orr     x26, x5, x17, lsl #61
-  28:   ca0281b8        eor     x24, x13, x2, lsl #32
-  2c:   ea918c7c        ands    x28, x3, x17, asr #35
-  30:   0a5d4a19        and     w25, w16, w29, lsr #18
-  34:   2a4b264d        orr     w13, w18, w11, lsr #9
-  38:   4a523ca5        eor     w5, w5, w18, lsr #15
-  3c:   6a9b6ae2        ands    w2, w23, w27, asr #26
-  40:   8a70b79b        bic     x27, x28, x16, lsr #45
-  44:   aaba9728        orn     x8, x25, x26, asr #37
-  48:   ca6dfe3d        eon     x29, x17, x13, lsr #63
-  4c:   ea627f1c        bics    x28, x24, x2, lsr #31
-  50:   0aa70f53        bic     w19, w26, w7, asr #3
-  54:   2aaa0f06        orn     w6, w24, w10, asr #3
-  58:   4a6176a4        eon     w4, w21, w1, lsr #29
-  5c:   6a604eb0        bics    w16, w21, w0, lsr #19
-  60:   1105ed91        add     w17, w12, #0x17b
-  64:   3100583e        adds    w30, w1, #0x16
-  68:   5101f8bd        sub     w29, w5, #0x7e
-  6c:   710f0306        subs    w6, w24, #0x3c0
-  70:   9101a1a0        add     x0, x13, #0x68
-  74:   b10a5cc8        adds    x8, x6, #0x297
-  78:   d10810aa        sub     x10, x5, #0x204
-  7c:   f10fd061        subs    x1, x3, #0x3f4
-  80:   120cb166        and     w6, w11, #0xfff1fff1
-  84:   321764bc        orr     w28, w5, #0xfffffe07
-  88:   52174681        eor     w1, w20, #0x7fffe00
-  8c:   720c0247        ands    w7, w18, #0x100000
-  90:   9241018e        and     x14, x12, #0x8000000000000000
-  94:   b25a2969        orr     x9, x11, #0x1ffc000000000
-  98:   d278b411        eor     x17, x0, #0x3fffffffffff00
-  9c:   f26aad01        ands    x1, x8, #0xffffffffffc00003
-  a0:   14000000        b       a0 <back+0xa0>
-  a4:   17ffffd7        b       0 <back>
-  a8:   14000279        b       a8c <forth>
-  ac:   94000000        bl      ac <back+0xac>
-  b0:   97ffffd4        bl      0 <back>
-  b4:   94000276        bl      a8c <forth>
-  b8:   3400000a        cbz     w10, b8 <back+0xb8>
-  bc:   34fffa2a        cbz     w10, 0 <back>
-  c0:   34004e6a        cbz     w10, a8c <forth>
-  c4:   35000008        cbnz    w8, c4 <back+0xc4>
-  c8:   35fff9c8        cbnz    w8, 0 <back>
-  cc:   35004e08        cbnz    w8, a8c <forth>
-  d0:   b400000b        cbz     x11, d0 <back+0xd0>
-  d4:   b4fff96b        cbz     x11, 0 <back>
-  d8:   b4004dab        cbz     x11, a8c <forth>
-  dc:   b500001d        cbnz    x29, dc <back+0xdc>
-  e0:   b5fff91d        cbnz    x29, 0 <back>
-  e4:   b5004d5d        cbnz    x29, a8c <forth>
-  e8:   10000013        adr     x19, e8 <back+0xe8>
-  ec:   10fff8b3        adr     x19, 0 <back>
-  f0:   10004cf3        adr     x19, a8c <forth>
-  f4:   90000013        adrp    x19, 0 <back>
-  f8:   36300016        tbz     w22, #6, f8 <back+0xf8>
-  fc:   3637f836        tbz     w22, #6, 0 <back>
- 100:   36304c76        tbz     w22, #6, a8c <forth>
- 104:   3758000c        tbnz    w12, #11, 104 <back+0x104>
- 108:   375ff7cc        tbnz    w12, #11, 0 <back>
- 10c:   37584c0c        tbnz    w12, #11, a8c <forth>
- 110:   128313a0        mov     w0, #0xffffe762                 // #-6302
- 114:   528a32c7        mov     w7, #0x5196                     // #20886
- 118:   7289173b        movk    w27, #0x48b9
- 11c:   92ab3acc        mov     x12, #0xffffffffa629ffff        // #-1507196929
- 120:   d2a0bf94        mov     x20, #0x5fc0000                 // #100401152
- 124:   f2c285e8        movk    x8, #0x142f, lsl #32
- 128:   9358722f        sbfx    x15, x17, #24, #5
- 12c:   330e652f        bfxil   w15, w9, #14, #12
- 130:   53067f3b        lsr     w27, w25, #6
- 134:   93577c53        sbfx    x19, x2, #23, #9
- 138:   b34a1aac        bfi     x12, x21, #54, #7
- 13c:   d35a4016        ubfiz   x22, x0, #38, #17
- 140:   13946c63        extr    w3, w3, w20, #27
- 144:   93c3dbc8        extr    x8, x30, x3, #54
- 148:   54000000        b.eq    148 <back+0x148>  // b.none
- 14c:   54fff5a0        b.eq    0 <back>  // b.none
- 150:   540049e0        b.eq    a8c <forth>  // b.none
- 154:   54000001        b.ne    154 <back+0x154>  // b.any
- 158:   54fff541        b.ne    0 <back>  // b.any
- 15c:   54004981        b.ne    a8c <forth>  // b.any
- 160:   54000002        b.cs    160 <back+0x160>  // b.hs, b.nlast
- 164:   54fff4e2        b.cs    0 <back>  // b.hs, b.nlast
- 168:   54004922        b.cs    a8c <forth>  // b.hs, b.nlast
- 16c:   54000002        b.cs    16c <back+0x16c>  // b.hs, b.nlast
- 170:   54fff482        b.cs    0 <back>  // b.hs, b.nlast
- 174:   540048c2        b.cs    a8c <forth>  // b.hs, b.nlast
- 178:   54000003        b.cc    178 <back+0x178>  // b.lo, b.ul, b.last
- 17c:   54fff423        b.cc    0 <back>  // b.lo, b.ul, b.last
- 180:   54004863        b.cc    a8c <forth>  // b.lo, b.ul, b.last
- 184:   54000003        b.cc    184 <back+0x184>  // b.lo, b.ul, b.last
- 188:   54fff3c3        b.cc    0 <back>  // b.lo, b.ul, b.last
- 18c:   54004803        b.cc    a8c <forth>  // b.lo, b.ul, b.last
- 190:   54000004        b.mi    190 <back+0x190>  // b.first
- 194:   54fff364        b.mi    0 <back>  // b.first
- 198:   540047a4        b.mi    a8c <forth>  // b.first
- 19c:   54000005        b.pl    19c <back+0x19c>  // b.nfrst
- 1a0:   54fff305        b.pl    0 <back>  // b.nfrst
- 1a4:   54004745        b.pl    a8c <forth>  // b.nfrst
- 1a8:   54000006        b.vs    1a8 <back+0x1a8>
- 1ac:   54fff2a6        b.vs    0 <back>
- 1b0:   540046e6        b.vs    a8c <forth>
- 1b4:   54000007        b.vc    1b4 <back+0x1b4>
- 1b8:   54fff247        b.vc    0 <back>
- 1bc:   54004687        b.vc    a8c <forth>
- 1c0:   54000008        b.hi    1c0 <back+0x1c0>  // b.pmore
- 1c4:   54fff1e8        b.hi    0 <back>  // b.pmore
- 1c8:   54004628        b.hi    a8c <forth>  // b.pmore
- 1cc:   54000009        b.ls    1cc <back+0x1cc>  // b.plast
- 1d0:   54fff189        b.ls    0 <back>  // b.plast
- 1d4:   540045c9        b.ls    a8c <forth>  // b.plast
- 1d8:   5400000a        b.ge    1d8 <back+0x1d8>  // b.tcont
- 1dc:   54fff12a        b.ge    0 <back>  // b.tcont
- 1e0:   5400456a        b.ge    a8c <forth>  // b.tcont
- 1e4:   5400000b        b.lt    1e4 <back+0x1e4>  // b.tstop
- 1e8:   54fff0cb        b.lt    0 <back>  // b.tstop
- 1ec:   5400450b        b.lt    a8c <forth>  // b.tstop
- 1f0:   5400000c        b.gt    1f0 <back+0x1f0>
- 1f4:   54fff06c        b.gt    0 <back>
- 1f8:   540044ac        b.gt    a8c <forth>
- 1fc:   5400000d        b.le    1fc <back+0x1fc>
- 200:   54fff00d        b.le    0 <back>
- 204:   5400444d        b.le    a8c <forth>
- 208:   5400000e        b.al    208 <back+0x208>
- 20c:   54ffefae        b.al    0 <back>
- 210:   540043ee        b.al    a8c <forth>
- 214:   5400000f        b.nv    214 <back+0x214>
- 218:   54ffef4f        b.nv    0 <back>
- 21c:   5400438f        b.nv    a8c <forth>
- 220:   d40658e1        svc     #0x32c7
- 224:   d4014d22        hvc     #0xa69
- 228:   d4046543        smc     #0x232a
- 22c:   d4273f60        brk     #0x39fb
- 230:   d44cad80        hlt     #0x656c
- 234:   d503201f        nop
- 238:   d69f03e0        eret
- 23c:   d6bf03e0        drps
- 240:   d5033fdf        isb
- 244:   d5033e9f        dsb     st
- 248:   d50332bf        dmb     oshst
- 24c:   d61f0200        br      x16
- 250:   d63f0280        blr     x20
- 254:   c80a7d1b        stxr    w10, x27, [x8]
- 258:   c800fea1        stlxr   w0, x1, [x21]
- 25c:   c85f7fb1        ldxr    x17, [x29]
- 260:   c85fff9d        ldaxr   x29, [x28]
- 264:   c89ffee1        stlr    x1, [x23]
- 268:   c8dffe95        ldar    x21, [x20]
- 26c:   88167e7b        stxr    w22, w27, [x19]
- 270:   880bfcd0        stlxr   w11, w16, [x6]
- 274:   885f7c12        ldxr    w18, [x0]
- 278:   885ffd44        ldaxr   w4, [x10]
- 27c:   889ffed8        stlr    w24, [x22]
- 280:   88dffe6a        ldar    w10, [x19]
- 284:   48017fc5        stxrh   w1, w5, [x30]
- 288:   4808fe2c        stlxrh  w8, w12, [x17]
- 28c:   485f7dc9        ldxrh   w9, [x14]
- 290:   485ffc27        ldaxrh  w7, [x1]
- 294:   489ffe05        stlrh   w5, [x16]
- 298:   48dffd82        ldarh   w2, [x12]
- 29c:   080a7c6c        stxrb   w10, w12, [x3]
- 2a0:   081cff4e        stlxrb  w28, w14, [x26]
- 2a4:   085f7d5e        ldxrb   w30, [x10]
- 2a8:   085ffeae        ldaxrb  w14, [x21]
- 2ac:   089ffd2d        stlrb   w13, [x9]
- 2b0:   08dfff76        ldarb   w22, [x27]
- 2b4:   c87f4d7c        ldxp    x28, x19, [x11]
- 2b8:   c87fcc5e        ldaxp   x30, x19, [x2]
- 2bc:   c8220417        stxp    w2, x23, x1, [x0]
- 2c0:   c82cb5f0        stlxp   w12, x16, x13, [x15]
- 2c4:   887f55b2        ldxp    w18, w21, [x13]
- 2c8:   887ff90b        ldaxp   w11, w30, [x8]
- 2cc:   88382c2d        stxp    w24, w13, w11, [x1]
- 2d0:   883aedb5        stlxp   w26, w21, w27, [x13]
- 2d4:   f819928b        stur    x11, [x20, #-103]
- 2d8:   b803e21c        stur    w28, [x16, #62]
- 2dc:   381f713b        sturb   w27, [x9, #-9]
- 2e0:   781ce322        sturh   w2, [x25, #-50]
- 2e4:   f850f044        ldur    x4, [x2, #-241]
- 2e8:   b85e129e        ldur    w30, [x20, #-31]
- 2ec:   385e92f2        ldurb   w18, [x23, #-23]
- 2f0:   785ff35d        ldurh   w29, [x26, #-1]
- 2f4:   39801921        ldrsb   x1, [x9, #6]
- 2f8:   7881318b        ldursh  x11, [x12, #19]
- 2fc:   78dce02b        ldursh  w11, [x1, #-50]
- 300:   b8829313        ldursw  x19, [x24, #41]
- 304:   fc45f318        ldur    d24, [x24, #95]
- 308:   bc5d50af        ldur    s15, [x5, #-43]
- 30c:   fc001375        stur    d21, [x27, #1]
- 310:   bc1951b7        stur    s23, [x13, #-107]
- 314:   f8008c0b        str     x11, [x0, #8]!
- 318:   b801dc03        str     w3, [x0, #29]!
- 31c:   38009dcb        strb    w11, [x14, #9]!
- 320:   781fdf1d        strh    w29, [x24, #-3]!
- 324:   f8570e2d        ldr     x13, [x17, #-144]!
- 328:   b85faecc        ldr     w12, [x22, #-6]!
- 32c:   385f6d8d        ldrb    w13, [x12, #-10]!
- 330:   785ebea0        ldrh    w0, [x21, #-21]!
- 334:   38804cf7        ldrsb   x23, [x7, #4]!
- 338:   789cbce3        ldrsh   x3, [x7, #-53]!
- 33c:   78df9cbc        ldrsh   w28, [x5, #-7]!
- 340:   b89eed38        ldrsw   x24, [x9, #-18]!
- 344:   fc40cd6e        ldr     d14, [x11, #12]!
- 348:   bc5bdd93        ldr     s19, [x12, #-67]!
- 34c:   fc103c14        str     d20, [x0, #-253]!
- 350:   bc040c08        str     s8, [x0, #64]!
- 354:   f81a2784        str     x4, [x28], #-94
- 358:   b81ca4ec        str     w12, [x7], #-54
- 35c:   381e855b        strb    w27, [x10], #-24
- 360:   7801b506        strh    w6, [x8], #27
- 364:   f853654e        ldr     x14, [x10], #-202
- 368:   b85d74b0        ldr     w16, [x5], #-41
- 36c:   384095c2        ldrb    w2, [x14], #9
- 370:   785ec5bc        ldrh    w28, [x13], #-20
- 374:   389e15a9        ldrsb   x9, [x13], #-31
- 378:   789dc703        ldrsh   x3, [x24], #-36
- 37c:   78c06474        ldrsh   w20, [x3], #6
- 380:   b89ff667        ldrsw   x7, [x19], #-1
- 384:   fc57e51e        ldr     d30, [x8], #-130
- 388:   bc4155f9        ldr     s25, [x15], #21
- 38c:   fc05a6ee        str     d14, [x23], #90
- 390:   bc1df408        str     s8, [x0], #-33
- 394:   f835da4a        str     x10, [x18, w21, sxtw #3]
- 398:   b836d9a4        str     w4, [x13, w22, sxtw #2]
- 39c:   3833580d        strb    w13, [x0, w19, uxtw #0]
- 3a0:   7826cb6c        strh    w12, [x27, w6, sxtw]
- 3a4:   f8706900        ldr     x0, [x8, x16]
- 3a8:   b87ae880        ldr     w0, [x4, x26, sxtx]
- 3ac:   3865db2e        ldrb    w14, [x25, w5, sxtw #0]
- 3b0:   78724889        ldrh    w9, [x4, w18, uxtw]
- 3b4:   38a7789b        ldrsb   x27, [x4, x7, lsl #0]
- 3b8:   78beca2f        ldrsh   x15, [x17, w30, sxtw]
- 3bc:   78f6c810        ldrsh   w16, [x0, w22, sxtw]
- 3c0:   b8bef956        ldrsw   x22, [x10, x30, sxtx #2]
- 3c4:   fc6afabd        ldr     d29, [x21, x10, sxtx #3]
- 3c8:   bc734963        ldr     s3, [x11, w19, uxtw]
- 3cc:   fc3d5b8d        str     d13, [x28, w29, uxtw #3]
- 3d0:   bc25fbb7        str     s23, [x29, x5, sxtx #2]
- 3d4:   f9189d05        str     x5, [x8, #12600]
- 3d8:   b91ecb1d        str     w29, [x24, #7880]
- 3dc:   39187a33        strb    w19, [x17, #1566]
- 3e0:   791f226d        strh    w13, [x19, #3984]
- 3e4:   f95aa2f3        ldr     x19, [x23, #13632]
- 3e8:   b9587bb7        ldr     w23, [x29, #6264]
- 3ec:   395f7176        ldrb    w22, [x11, #2012]
- 3f0:   795d9143        ldrh    w3, [x10, #3784]
- 3f4:   399e7e08        ldrsb   x8, [x16, #1951]
- 3f8:   799a2697        ldrsh   x23, [x20, #3346]
- 3fc:   79df3422        ldrsh   w2, [x1, #3994]
- 400:   b99c2624        ldrsw   x4, [x17, #7204]
- 404:   fd5c2374        ldr     d20, [x27, #14400]
- 408:   bd5fa1d9        ldr     s25, [x14, #8096]
- 40c:   fd1d595a        str     d26, [x10, #15024]
- 410:   bd1b1869        str     s9, [x3, #6936]
- 414:   580033db        ldr     x27, a8c <forth>
- 418:   1800000b        ldr     w11, 418 <back+0x418>
- 41c:   f8945060        prfum   pldl1keep, [x3, #-187]
- 420:   d8000000        prfm    pldl1keep, 420 <back+0x420>
- 424:   f8ae6ba0        prfm    pldl1keep, [x29, x14]
- 428:   f99a0080        prfm    pldl1keep, [x4, #13312]
- 42c:   1a070035        adc     w21, w1, w7
- 430:   3a0700a8        adcs    w8, w5, w7
- 434:   5a0e0367        sbc     w7, w27, w14
- 438:   7a11009b        sbcs    w27, w4, w17
- 43c:   9a000380        adc     x0, x28, x0
- 440:   ba1e030c        adcs    x12, x24, x30
- 444:   da0f0320        sbc     x0, x25, x15
- 448:   fa030301        sbcs    x1, x24, x3
- 44c:   0b340b12        add     w18, w24, w20, uxtb #2
- 450:   2b2a278d        adds    w13, w28, w10, uxth #1
- 454:   cb22aa0f        sub     x15, x16, w2, sxth #2
- 458:   6b2d29bd        subs    w29, w13, w13, uxth #2
- 45c:   8b2cce8c        add     x12, x20, w12, sxtw #3
- 460:   ab2b877e        adds    x30, x27, w11, sxtb #1
- 464:   cb21c8ee        sub     x14, x7, w1, sxtw #2
- 468:   eb3ba47d        subs    x29, x3, w27, sxth #1
- 46c:   3a4d400e        ccmn    w0, w13, #0xe, mi  // mi = first
- 470:   7a5232c6        ccmp    w22, w18, #0x6, cc  // cc = lo, ul, last
- 474:   ba5e624e        ccmn    x18, x30, #0xe, vs
- 478:   fa53814c        ccmp    x10, x19, #0xc, hi  // hi = pmore
- 47c:   3a52d8c2        ccmn    w6, #0x12, #0x2, le
- 480:   7a4d8924        ccmp    w9, #0xd, #0x4, hi  // hi = pmore
- 484:   ba4b3aab        ccmn    x21, #0xb, #0xb, cc  // cc = lo, ul, last
- 488:   fa4d7882        ccmp    x4, #0xd, #0x2, vc
- 48c:   1a96804c        csel    w12, w2, w22, hi  // hi = pmore
- 490:   1a912618        csinc   w24, w16, w17, cs  // cs = hs, nlast
- 494:   5a90b0e6        csinv   w6, w7, w16, lt  // lt = tstop
- 498:   5a96976b        csneg   w11, w27, w22, ls  // ls = plast
- 49c:   9a9db06a        csel    x10, x3, x29, lt  // lt = tstop
- 4a0:   9a9b374c        csinc   x12, x26, x27, cc  // cc = lo, ul, last
- 4a4:   da95c14f        csinv   x15, x10, x21, gt
- 4a8:   da89c6fe        csneg   x30, x23, x9, gt
- 4ac:   5ac0015e        rbit    w30, w10
- 4b0:   5ac005fd        rev16   w29, w15
- 4b4:   5ac00bdd        rev     w29, w30
- 4b8:   5ac012b9        clz     w25, w21
- 4bc:   5ac01404        cls     w4, w0
- 4c0:   dac002b2        rbit    x18, x21
- 4c4:   dac0061d        rev16   x29, x16
- 4c8:   dac00a95        rev32   x21, x20
- 4cc:   dac00e66        rev     x6, x19
- 4d0:   dac0107e        clz     x30, x3
- 4d4:   dac01675        cls     x21, x19
- 4d8:   1ac00b0b        udiv    w11, w24, w0
- 4dc:   1ace0f3b        sdiv    w27, w25, w14
- 4e0:   1ad221c3        lsl     w3, w14, w18
- 4e4:   1ad825e7        lsr     w7, w15, w24
- 4e8:   1ad92a3c        asr     w28, w17, w25
- 4ec:   1adc2f42        ror     w2, w26, w28
- 4f0:   9ada0b25        udiv    x5, x25, x26
- 4f4:   9ad20e1b        sdiv    x27, x16, x18
- 4f8:   9acc22a6        lsl     x6, x21, x12
- 4fc:   9acc2480        lsr     x0, x4, x12
- 500:   9adc2a3b        asr     x27, x17, x28
- 504:   9ad22c5c        ror     x28, x2, x18
- 508:   9bce7dea        umulh   x10, x15, x14
- 50c:   9b597c6e        smulh   x14, x3, x25
- 510:   1b0e166f        madd    w15, w19, w14, w5
- 514:   1b1ae490        msub    w16, w4, w26, w25
- 518:   9b023044        madd    x4, x2, x2, x12
- 51c:   9b089e3d        msub    x29, x17, x8, x7
- 520:   9b391083        smaddl  x3, w4, w25, x4
- 524:   9b24c73a        smsubl  x26, w25, w4, x17
- 528:   9bb15f40        umaddl  x0, w26, w17, x23
- 52c:   9bbcc6af        umsubl  x15, w21, w28, x17
- 530:   1e23095b        fmul    s27, s10, s3
- 534:   1e3918e0        fdiv    s0, s7, s25
- 538:   1e2f28c9        fadd    s9, s6, s15
- 53c:   1e2a39fd        fsub    s29, s15, s10
- 540:   1e270a22        fmul    s2, s17, s7
- 544:   1e77096b        fmul    d11, d11, d23
- 548:   1e771ba7        fdiv    d7, d29, d23
- 54c:   1e6b2b6e        fadd    d14, d27, d11
- 550:   1e78388b        fsub    d11, d4, d24
- 554:   1e6e09ec        fmul    d12, d15, d14
- 558:   1f1c3574        fmadd   s20, s11, s28, s13
- 55c:   1f17f98b        fmsub   s11, s12, s23, s30
- 560:   1f2935da        fnmadd  s26, s14, s9, s13
- 564:   1f2574ea        fnmadd  s10, s7, s5, s29
- 568:   1f4b306f        fmadd   d15, d3, d11, d12
- 56c:   1f5ec7cf        fmsub   d15, d30, d30, d17
- 570:   1f6f3e93        fnmadd  d19, d20, d15, d15
- 574:   1f6226a9        fnmadd  d9, d21, d2, d9
- 578:   1e2040fb        fmov    s27, s7
- 57c:   1e20c3dd        fabs    s29, s30
- 580:   1e214031        fneg    s17, s1
- 584:   1e21c0c2        fsqrt   s2, s6
- 588:   1e22c06a        fcvt    d10, s3
- 58c:   1e604178        fmov    d24, d11
- 590:   1e60c027        fabs    d7, d1
- 594:   1e61400b        fneg    d11, d0
- 598:   1e61c243        fsqrt   d3, d18
- 59c:   1e6240dc        fcvt    s28, d6
- 5a0:   1e3800d6        fcvtzs  w22, s6
- 5a4:   9e380360        fcvtzs  x0, s27
- 5a8:   1e78005a        fcvtzs  w26, d2
- 5ac:   9e7800e5        fcvtzs  x5, d7
- 5b0:   1e22017c        scvtf   s28, w11
- 5b4:   9e2201b9        scvtf   s25, x13
- 5b8:   1e6202eb        scvtf   d11, w23
- 5bc:   9e620113        scvtf   d19, x8
- 5c0:   1e2602b2        fmov    w18, s21
- 5c4:   9e660299        fmov    x25, d20
- 5c8:   1e270253        fmov    s19, w18
- 5cc:   9e6703a2        fmov    d2, x29
- 5d0:   1e2822c0        fcmp    s22, s8
- 5d4:   1e7322a0        fcmp    d21, d19
- 5d8:   1e202288        fcmp    s20, #0.0
- 5dc:   1e602168        fcmp    d11, #0.0
- 5e0:   293c19f4        stp     w20, w6, [x15, #-32]
- 5e4:   2966387b        ldp     w27, w14, [x3, #-208]
- 5e8:   69762971        ldpsw   x17, x10, [x11, #-80]
- 5ec:   a9041dc7        stp     x7, x7, [x14, #64]
- 5f0:   a9475c0c        ldp     x12, x23, [x0, #112]
- 5f4:   29b61ccd        stp     w13, w7, [x6, #-80]!
- 5f8:   29ee405e        ldp     w30, w16, [x2, #-144]!
- 5fc:   69ee0744        ldpsw   x4, x1, [x26, #-144]!
- 600:   a9843977        stp     x23, x14, [x11, #64]!
- 604:   a9f46ebd        ldp     x29, x27, [x21, #-192]!
- 608:   28ba16b6        stp     w22, w5, [x21], #-48
- 60c:   28fc44db        ldp     w27, w17, [x6], #-32
- 610:   68f61831        ldpsw   x17, x6, [x1], #-80
- 614:   a8b352ad        stp     x13, x20, [x21], #-208
- 618:   a8c56d5e        ldp     x30, x27, [x10], #80
- 61c:   28024565        stnp    w5, w17, [x11, #16]
- 620:   2874134e        ldnp    w14, w4, [x26, #-96]
- 624:   a8027597        stnp    x23, x29, [x12, #32]
- 628:   a87b1aa0        ldnp    x0, x6, [x21, #-80]
- 62c:   0c40734f        ld1     {v15.8b}, [x26]
- 630:   4cdfa177        ld1     {v23.16b, v24.16b}, [x11], #32
- 634:   0cc76ee8        ld1     {v8.1d-v10.1d}, [x23], x7
- 638:   4cdf2733        ld1     {v19.8h-v22.8h}, [x25], #64
- 63c:   0d40c23d        ld1r    {v29.8b}, [x17]
- 640:   4ddfcaf8        ld1r    {v24.4s}, [x23], #4
- 644:   0dd9ccaa        ld1r    {v10.1d}, [x5], x25
- 648:   4c408d52        ld2     {v18.2d, v19.2d}, [x10]
- 64c:   0cdf85ec        ld2     {v12.4h, v13.4h}, [x15], #16
- 650:   4d60c259        ld2r    {v25.16b, v26.16b}, [x18]
- 654:   0dffcbc1        ld2r    {v1.2s, v2.2s}, [x30], #8
- 658:   4de9ce50        ld2r    {v16.2d, v17.2d}, [x18], x9
- 65c:   4cc24999        ld3     {v25.4s-v27.4s}, [x12], x2
- 660:   0c404a7a        ld3     {v26.2s-v28.2s}, [x19]
- 664:   4d40e6af        ld3r    {v15.8h-v17.8h}, [x21]
- 668:   4ddfe9b9        ld3r    {v25.4s-v27.4s}, [x13], #12
- 66c:   0dddef8e        ld3r    {v14.1d-v16.1d}, [x28], x29
- 670:   4cdf07b1        ld4     {v17.8h-v20.8h}, [x29], #64
- 674:   0cc000fb        ld4     {v27.8b-v30.8b}, [x7], x0
- 678:   0d60e258        ld4r    {v24.8b-v27.8b}, [x18]
- 67c:   0dffe740        ld4r    {v0.4h-v3.4h}, [x26], #8
- 680:   0de2eb2c        ld4r    {v12.2s-v15.2s}, [x25], x2
- 684:   0e31baf6        addv    b22, v23.8b
- 688:   4e31bb9b        addv    b27, v28.16b
- 68c:   0e71b8a4        addv    h4, v5.4h
- 690:   4e71b907        addv    h7, v8.8h
- 694:   4eb1b8e6        addv    s6, v7.4s
- 698:   0e30a841        smaxv   b1, v2.8b
- 69c:   4e30ab7a        smaxv   b26, v27.16b
- 6a0:   0e70aa0f        smaxv   h15, v16.4h
- 6a4:   4e70a862        smaxv   h2, v3.8h
- 6a8:   4eb0a9cd        smaxv   s13, v14.4s
- 6ac:   6e30f9cd        fmaxv   s13, v14.4s
- 6b0:   0e31ab38        sminv   b24, v25.8b
- 6b4:   4e31ab17        sminv   b23, v24.16b
- 6b8:   0e71a8a4        sminv   h4, v5.4h
- 6bc:   4e71aa93        sminv   h19, v20.8h
- 6c0:   4eb1aa0f        sminv   s15, v16.4s
- 6c4:   6eb0f820        fminv   s0, v1.4s
- 6c8:   0e20b8a4        abs     v4.8b, v5.8b
- 6cc:   4e20bab4        abs     v20.16b, v21.16b
- 6d0:   0e60b98b        abs     v11.4h, v12.4h
- 6d4:   4e60bbdd        abs     v29.8h, v30.8h
- 6d8:   0ea0ba0f        abs     v15.2s, v16.2s
- 6dc:   4ea0bad5        abs     v21.4s, v22.4s
- 6e0:   4ee0b8a4        abs     v4.2d, v5.2d
- 6e4:   0ea0f9ee        fabs    v14.2s, v15.2s
- 6e8:   4ea0faf6        fabs    v22.4s, v23.4s
- 6ec:   4ee0fb59        fabs    v25.2d, v26.2d
- 6f0:   2ea0f8e6        fneg    v6.2s, v7.2s
- 6f4:   6ea0f9ac        fneg    v12.4s, v13.4s
- 6f8:   6ee0f9ee        fneg    v14.2d, v15.2d
- 6fc:   2ea1f9cd        fsqrt   v13.2s, v14.2s
- 700:   6ea1f9ee        fsqrt   v14.4s, v15.4s
- 704:   6ee1f949        fsqrt   v9.2d, v10.2d
- 708:   2e205b59        mvn     v25.8b, v26.8b
- 70c:   6e205bbc        mvn     v28.16b, v29.16b
- 710:   0e2c1d6a        and     v10.8b, v11.8b, v12.8b
- 714:   4e351e93        and     v19.16b, v20.16b, v21.16b
- 718:   0ead1d8b        orr     v11.8b, v12.8b, v13.8b
- 71c:   4eb31e51        orr     v17.16b, v18.16b, v19.16b
- 720:   2e371ed5        eor     v21.8b, v22.8b, v23.8b
- 724:   6e311e0f        eor     v15.16b, v16.16b, v17.16b
- 728:   0e3686b4        add     v20.8b, v21.8b, v22.8b
- 72c:   4e398717        add     v23.16b, v24.16b, v25.16b
- 730:   0e7c877a        add     v26.4h, v27.4h, v28.4h
- 734:   4e6784c5        add     v5.8h, v6.8h, v7.8h
- 738:   0ea884e6        add     v6.2s, v7.2s, v8.2s
- 73c:   4eb1860f        add     v15.4s, v16.4s, v17.4s
- 740:   4ef1860f        add     v15.2d, v16.2d, v17.2d
- 744:   0e3bd759        fadd    v25.2s, v26.2s, v27.2s
- 748:   4e32d630        fadd    v16.4s, v17.4s, v18.4s
- 74c:   4e7dd79b        fadd    v27.2d, v28.2d, v29.2d
- 750:   2e3a8738        sub     v24.8b, v25.8b, v26.8b
- 754:   6e31860f        sub     v15.16b, v16.16b, v17.16b
- 758:   2e7b8759        sub     v25.4h, v26.4h, v27.4h
- 75c:   6e7085ee        sub     v14.8h, v15.8h, v16.8h
- 760:   2eac856a        sub     v10.2s, v11.2s, v12.2s
- 764:   6eaf85cd        sub     v13.4s, v14.4s, v15.4s
- 768:   6ef085ee        sub     v14.2d, v15.2d, v16.2d
- 76c:   0eb6d6b4        fsub    v20.2s, v21.2s, v22.2s
- 770:   4ea3d441        fsub    v1.4s, v2.4s, v3.4s
- 774:   4ef8d6f6        fsub    v22.2d, v23.2d, v24.2d
- 778:   0e209ffe        mul     v30.8b, v31.8b, v0.8b
- 77c:   4e309dee        mul     v14.16b, v15.16b, v16.16b
- 780:   0e649c62        mul     v2.4h, v3.4h, v4.4h
- 784:   4e689ce6        mul     v6.8h, v7.8h, v8.8h
- 788:   0ea59c83        mul     v3.2s, v4.2s, v5.2s
- 78c:   4ea99d07        mul     v7.4s, v8.4s, v9.4s
- 790:   2e3adf38        fmul    v24.2s, v25.2s, v26.2s
- 794:   6e22dc20        fmul    v0.4s, v1.4s, v2.4s
- 798:   6e7ddf9b        fmul    v27.2d, v28.2d, v29.2d
- 79c:   0e7f97dd        mla     v29.4h, v30.4h, v31.4h
- 7a0:   4e6794c5        mla     v5.8h, v6.8h, v7.8h
- 7a4:   0ea794c5        mla     v5.2s, v6.2s, v7.2s
- 7a8:   4ebf97dd        mla     v29.4s, v30.4s, v31.4s
- 7ac:   0e2dcd8b        fmla    v11.2s, v12.2s, v13.2s
- 7b0:   4e3bcf59        fmla    v25.4s, v26.4s, v27.4s
- 7b4:   4e62cc20        fmla    v0.2d, v1.2d, v2.2d
- 7b8:   2e6097fe        mls     v30.4h, v31.4h, v0.4h
- 7bc:   6e629420        mls     v0.8h, v1.8h, v2.8h
- 7c0:   2eb49672        mls     v18.2s, v19.2s, v20.2s
- 7c4:   6ebe97bc        mls     v28.4s, v29.4s, v30.4s
- 7c8:   0ebbcf59        fmls    v25.2s, v26.2s, v27.2s
- 7cc:   4eabcd49        fmls    v9.4s, v10.4s, v11.4s
- 7d0:   4efbcf59        fmls    v25.2d, v26.2d, v27.2d
- 7d4:   2e2efdac        fdiv    v12.2s, v13.2s, v14.2s
- 7d8:   6e31fe0f        fdiv    v15.4s, v16.4s, v17.4s
- 7dc:   6e6dfd8b        fdiv    v11.2d, v12.2d, v13.2d
- 7e0:   0e2c656a        smax    v10.8b, v11.8b, v12.8b
- 7e4:   4e346672        smax    v18.16b, v19.16b, v20.16b
- 7e8:   0e7a6738        smax    v24.4h, v25.4h, v26.4h
- 7ec:   4e7766d5        smax    v21.8h, v22.8h, v23.8h
- 7f0:   0eb96717        smax    v23.2s, v24.2s, v25.2s
- 7f4:   4ea26420        smax    v0.4s, v1.4s, v2.4s
- 7f8:   0e32f630        fmax    v16.2s, v17.2s, v18.2s
- 7fc:   4e2cf56a        fmax    v10.4s, v11.4s, v12.4s
- 800:   4e68f4e6        fmax    v6.2d, v7.2d, v8.2d
- 804:   0e3e6fbc        smin    v28.8b, v29.8b, v30.8b
- 808:   4e286ce6        smin    v6.16b, v7.16b, v8.16b
- 80c:   0e676cc5        smin    v5.4h, v6.4h, v7.4h
- 810:   4e676cc5        smin    v5.8h, v6.8h, v7.8h
- 814:   0eb66eb4        smin    v20.2s, v21.2s, v22.2s
- 818:   4eb46e72        smin    v18.4s, v19.4s, v20.4s
- 81c:   0eb1f60f        fmin    v15.2s, v16.2s, v17.2s
- 820:   4eb4f672        fmin    v18.4s, v19.4s, v20.4s
- 824:   4efff7dd        fmin    v29.2d, v30.2d, v31.2d
- 828:   2e3c8f7a        cmeq    v26.8b, v27.8b, v28.8b
- 82c:   6e3e8fbc        cmeq    v28.16b, v29.16b, v30.16b
- 830:   2e638c41        cmeq    v1.4h, v2.4h, v3.4h
- 834:   6e7d8f9b        cmeq    v27.8h, v28.8h, v29.8h
- 838:   2ea28c20        cmeq    v0.2s, v1.2s, v2.2s
- 83c:   6eb68eb4        cmeq    v20.4s, v21.4s, v22.4s
- 840:   6efe8fbc        cmeq    v28.2d, v29.2d, v30.2d
- 844:   0e31e60f        fcmeq   v15.2s, v16.2s, v17.2s
- 848:   4e2ee5ac        fcmeq   v12.4s, v13.4s, v14.4s
- 84c:   4e6ce56a        fcmeq   v10.2d, v11.2d, v12.2d
- 850:   0e3e37bc        cmgt    v28.8b, v29.8b, v30.8b
- 854:   4e3e37bc        cmgt    v28.16b, v29.16b, v30.16b
- 858:   0e753693        cmgt    v19.4h, v20.4h, v21.4h
- 85c:   4e7836f6        cmgt    v22.8h, v23.8h, v24.8h
- 860:   0eac356a        cmgt    v10.2s, v11.2s, v12.2s
- 864:   4ea634a4        cmgt    v4.4s, v5.4s, v6.4s
- 868:   4ee037fe        cmgt    v30.2d, v31.2d, v0.2d
- 86c:   2eb6e6b4        fcmgt   v20.2s, v21.2s, v22.2s
- 870:   6eaae528        fcmgt   v8.4s, v9.4s, v10.4s
- 874:   6ee0e7fe        fcmgt   v30.2d, v31.2d, v0.2d
- 878:   0e343e72        cmge    v18.8b, v19.8b, v20.8b
- 87c:   4e2c3d6a        cmge    v10.16b, v11.16b, v12.16b
- 880:   0e7d3f9b        cmge    v27.4h, v28.4h, v29.4h
- 884:   4e643c62        cmge    v2.8h, v3.8h, v4.8h
- 888:   0eba3f38        cmge    v24.2s, v25.2s, v26.2s
- 88c:   4ea63ca4        cmge    v4.4s, v5.4s, v6.4s
- 890:   4ee53c83        cmge    v3.2d, v4.2d, v5.2d
- 894:   2e2ae528        fcmge   v8.2s, v9.2s, v10.2s
- 898:   6e38e6f6        fcmge   v22.4s, v23.4s, v24.4s
- 89c:   6e74e672        fcmge   v18.2d, v19.2d, v20.2d
- 8a0:   ce7c808d        sha512h         q13, q4, v28.2d
- 8a4:   ce7986b7        sha512h2        q23, q21, v25.2d
- 8a8:   cec08078        sha512su0       v24.2d, v3.2d
- 8ac:   ce778b57        sha512su1       v23.2d, v26.2d, v23.2d
- 8b0:   ba5fd3e3        ccmn    xzr, xzr, #0x3, le
- 8b4:   3a5f03e5        ccmn    wzr, wzr, #0x5, eq  // eq = none
- 8b8:   fa411be4        ccmp    xzr, #0x1, #0x4, ne  // ne = any
- 8bc:   7a42cbe2        ccmp    wzr, #0x2, #0x2, gt
- 8c0:   93df03ff        ror     xzr, xzr, #0
- 8c4:   c820ffff        stlxp   w0, xzr, xzr, [sp]
- 8c8:   8822fc7f        stlxp   w2, wzr, wzr, [x3]
- 8cc:   c8247cbf        stxp    w4, xzr, xzr, [x5]
- 8d0:   88267fff        stxp    w6, wzr, wzr, [sp]
- 8d4:   4e010fe0        dup     v0.16b, wzr
- 8d8:   4e081fe1        mov     v1.d[0], xzr
- 8dc:   4e0c1fe1        mov     v1.s[1], wzr
- 8e0:   4e0a1fe1        mov     v1.h[2], wzr
- 8e4:   4e071fe1        mov     v1.b[3], wzr
- 8e8:   4cc0ac3f        ld1     {v31.2d, v0.2d}, [x1], x0
- 8ec:   1e601000        fmov    d0, #2.000000000000000000e+00
- 8f0:   1e603000        fmov    d0, #2.125000000000000000e+00
- 8f4:   1e621000        fmov    d0, #4.000000000000000000e+00
- 8f8:   1e623000        fmov    d0, #4.250000000000000000e+00
- 8fc:   1e641000        fmov    d0, #8.000000000000000000e+00
- 900:   1e643000        fmov    d0, #8.500000000000000000e+00
- 904:   1e661000        fmov    d0, #1.600000000000000000e+01
- 908:   1e663000        fmov    d0, #1.700000000000000000e+01
- 90c:   1e681000        fmov    d0, #1.250000000000000000e-01
- 910:   1e683000        fmov    d0, #1.328125000000000000e-01
- 914:   1e6a1000        fmov    d0, #2.500000000000000000e-01
- 918:   1e6a3000        fmov    d0, #2.656250000000000000e-01
- 91c:   1e6c1000        fmov    d0, #5.000000000000000000e-01
- 920:   1e6c3000        fmov    d0, #5.312500000000000000e-01
- 924:   1e6e1000        fmov    d0, #1.000000000000000000e+00
- 928:   1e6e3000        fmov    d0, #1.062500000000000000e+00
- 92c:   1e701000        fmov    d0, #-2.000000000000000000e+00
- 930:   1e703000        fmov    d0, #-2.125000000000000000e+00
- 934:   1e721000        fmov    d0, #-4.000000000000000000e+00
- 938:   1e723000        fmov    d0, #-4.250000000000000000e+00
- 93c:   1e741000        fmov    d0, #-8.000000000000000000e+00
- 940:   1e743000        fmov    d0, #-8.500000000000000000e+00
- 944:   1e761000        fmov    d0, #-1.600000000000000000e+01
- 948:   1e763000        fmov    d0, #-1.700000000000000000e+01
- 94c:   1e781000        fmov    d0, #-1.250000000000000000e-01
- 950:   1e783000        fmov    d0, #-1.328125000000000000e-01
- 954:   1e7a1000        fmov    d0, #-2.500000000000000000e-01
- 958:   1e7a3000        fmov    d0, #-2.656250000000000000e-01
- 95c:   1e7c1000        fmov    d0, #-5.000000000000000000e-01
- 960:   1e7c3000        fmov    d0, #-5.312500000000000000e-01
- 964:   1e7e1000        fmov    d0, #-1.000000000000000000e+00
- 968:   1e7e3000        fmov    d0, #-1.062500000000000000e+00
- 96c:   f82f8075        swp     x15, x21, [x3]
- 970:   f8380328        ldadd   x24, x8, [x25]
- 974:   f8341230        ldclr   x20, x16, [x17]
- 978:   f8222001        ldeor   x2, x1, [x0]
- 97c:   f8383064        ldset   x24, x4, [x3]
- 980:   f82c539f        stsmin  x12, [x28]
- 984:   f82a405a        ldsmax  x10, x26, [x2]
- 988:   f82c73f2        ldumin  x12, x18, [sp]
- 98c:   f82163ad        ldumax  x1, x13, [x29]
- 990:   f8a08193        swpa    x0, x19, [x12]
- 994:   f8b101b6        ldadda  x17, x22, [x13]
- 998:   f8bc13fe        ldclra  x28, x30, [sp]
- 99c:   f8a1239a        ldeora  x1, x26, [x28]
- 9a0:   f8a4309e        ldseta  x4, x30, [x4]
- 9a4:   f8a6535e        ldsmina         x6, x30, [x26]
- 9a8:   f8b24109        ldsmaxa         x18, x9, [x8]
- 9ac:   f8ac7280        ldumina         x12, x0, [x20]
- 9b0:   f8a16058        ldumaxa         x1, x24, [x2]
- 9b4:   f8e08309        swpal   x0, x9, [x24]
- 9b8:   f8fa03d0        ldaddal         x26, x16, [x30]
- 9bc:   f8e312ea        ldclral         x3, x10, [x23]
- 9c0:   f8ea2244        ldeoral         x10, x4, [x18]
- 9c4:   f8e2310b        ldsetal         x2, x11, [x8]
- 9c8:   f8ea522f        ldsminal        x10, x15, [x17]
- 9cc:   f8e2418a        ldsmaxal        x2, x10, [x12]
- 9d0:   f8ec71af        lduminal        x12, x15, [x13]
- 9d4:   f8e26287        ldumaxal        x2, x7, [x20]
- 9d8:   f87a8090        swpl    x26, x16, [x4]
- 9dc:   f8620184        ldaddl  x2, x4, [x12]
- 9e0:   f8721215        ldclrl  x18, x21, [x16]
- 9e4:   f87222ab        ldeorl  x18, x11, [x21]
- 9e8:   f877334c        ldsetl  x23, x12, [x26]
- 9ec:   f87751dc        ldsminl         x23, x28, [x14]
- 9f0:   f86b4038        ldsmaxl         x11, x24, [x1]
- 9f4:   f86c715f        stuminl         x12, [x10]
- 9f8:   f8706047        ldumaxl         x16, x7, [x2]
- 9fc:   b823826d        swp     w3, w13, [x19]
- a00:   b8310070        ldadd   w17, w16, [x3]
- a04:   b82113cb        ldclr   w1, w11, [x30]
- a08:   b82521e8        ldeor   w5, w8, [x15]
- a0c:   b83d301e        ldset   w29, w30, [x0]
- a10:   b8345287        ldsmin  w20, w7, [x20]
- a14:   b83742bc        ldsmax  w23, w28, [x21]
- a18:   b83b70b9        ldumin  w27, w25, [x5]
- a1c:   b8216217        ldumax  w1, w23, [x16]
- a20:   b8bf8185        swpa    wzr, w5, [x12]
- a24:   b8a901fc        ldadda  w9, w28, [x15]
- a28:   b8bd13f6        ldclra  w29, w22, [sp]
- a2c:   b8b320bf        ldeora  w19, wzr, [x5]
- a30:   b8ae33f2        ldseta  w14, w18, [sp]
- a34:   b8b2529b        ldsmina         w18, w27, [x20]
- a38:   b8b0416c        ldsmaxa         w16, w12, [x11]
- a3c:   b8a973c6        ldumina         w9, w6, [x30]
- a40:   b8b1639b        ldumaxa         w17, w27, [x28]
- a44:   b8fe8147        swpal   w30, w7, [x10]
- a48:   b8f4008a        ldaddal         w20, w10, [x4]
- a4c:   b8f81231        ldclral         w24, w17, [x17]
- a50:   b8f623a3        ldeoral         w22, w3, [x29]
- a54:   b8ef3276        ldsetal         w15, w22, [x19]
- a58:   b8f35056        ldsminal        w19, w22, [x2]
- a5c:   b8ef4186        ldsmaxal        w15, w6, [x12]
- a60:   b8f071ab        lduminal        w16, w11, [x13]
- a64:   b8f763c1        ldumaxal        w23, w1, [x30]
- a68:   b8738225        swpl    w19, w5, [x17]
- a6c:   b86202d0        ldaddl  w2, w16, [x22]
- a70:   b86d12aa        ldclrl  w13, w10, [x21]
- a74:   b87d219b        ldeorl  w29, w27, [x12]
- a78:   b87b3023        ldsetl  w27, w3, [x1]
- a7c:   b87f5278        ldsminl         wzr, w24, [x19]
- a80:   b8714389        ldsmaxl         w17, w9, [x28]
- a84:   b87b70ef        lduminl         w27, w15, [x7]
- a88:   b87563f7        ldumaxl         w21, w23, [sp]
-
 */
 
   static const unsigned int insns[] =
@@ -1600,30 +1001,30 @@
     0x9101a1a0,     0xb10a5cc8,     0xd10810aa,     0xf10fd061,
     0x120cb166,     0x321764bc,     0x52174681,     0x720c0247,
     0x9241018e,     0xb25a2969,     0xd278b411,     0xf26aad01,
-    0x14000000,     0x17ffffd7,     0x14000279,     0x94000000,
-    0x97ffffd4,     0x94000276,     0x3400000a,     0x34fffa2a,
-    0x34004e6a,     0x35000008,     0x35fff9c8,     0x35004e08,
-    0xb400000b,     0xb4fff96b,     0xb4004dab,     0xb500001d,
-    0xb5fff91d,     0xb5004d5d,     0x10000013,     0x10fff8b3,
-    0x10004cf3,     0x90000013,     0x36300016,     0x3637f836,
-    0x36304c76,     0x3758000c,     0x375ff7cc,     0x37584c0c,
+    0x14000000,     0x17ffffd7,     0x140002c9,     0x94000000,
+    0x97ffffd4,     0x940002c6,     0x3400000a,     0x34fffa2a,
+    0x3400586a,     0x35000008,     0x35fff9c8,     0x35005808,
+    0xb400000b,     0xb4fff96b,     0xb40057ab,     0xb500001d,
+    0xb5fff91d,     0xb500575d,     0x10000013,     0x10fff8b3,
+    0x100056f3,     0x90000013,     0x36300016,     0x3637f836,
+    0x36305676,     0x3758000c,     0x375ff7cc,     0x3758560c,
     0x128313a0,     0x528a32c7,     0x7289173b,     0x92ab3acc,
     0xd2a0bf94,     0xf2c285e8,     0x9358722f,     0x330e652f,
     0x53067f3b,     0x93577c53,     0xb34a1aac,     0xd35a4016,
     0x13946c63,     0x93c3dbc8,     0x54000000,     0x54fff5a0,
-    0x540049e0,     0x54000001,     0x54fff541,     0x54004981,
-    0x54000002,     0x54fff4e2,     0x54004922,     0x54000002,
-    0x54fff482,     0x540048c2,     0x54000003,     0x54fff423,
-    0x54004863,     0x54000003,     0x54fff3c3,     0x54004803,
-    0x54000004,     0x54fff364,     0x540047a4,     0x54000005,
-    0x54fff305,     0x54004745,     0x54000006,     0x54fff2a6,
-    0x540046e6,     0x54000007,     0x54fff247,     0x54004687,
-    0x54000008,     0x54fff1e8,     0x54004628,     0x54000009,
-    0x54fff189,     0x540045c9,     0x5400000a,     0x54fff12a,
-    0x5400456a,     0x5400000b,     0x54fff0cb,     0x5400450b,
-    0x5400000c,     0x54fff06c,     0x540044ac,     0x5400000d,
-    0x54fff00d,     0x5400444d,     0x5400000e,     0x54ffefae,
-    0x540043ee,     0x5400000f,     0x54ffef4f,     0x5400438f,
+    0x540053e0,     0x54000001,     0x54fff541,     0x54005381,
+    0x54000002,     0x54fff4e2,     0x54005322,     0x54000002,
+    0x54fff482,     0x540052c2,     0x54000003,     0x54fff423,
+    0x54005263,     0x54000003,     0x54fff3c3,     0x54005203,
+    0x54000004,     0x54fff364,     0x540051a4,     0x54000005,
+    0x54fff305,     0x54005145,     0x54000006,     0x54fff2a6,
+    0x540050e6,     0x54000007,     0x54fff247,     0x54005087,
+    0x54000008,     0x54fff1e8,     0x54005028,     0x54000009,
+    0x54fff189,     0x54004fc9,     0x5400000a,     0x54fff12a,
+    0x54004f6a,     0x5400000b,     0x54fff0cb,     0x54004f0b,
+    0x5400000c,     0x54fff06c,     0x54004eac,     0x5400000d,
+    0x54fff00d,     0x54004e4d,     0x5400000e,     0x54ffefae,
+    0x54004dee,     0x5400000f,     0x54ffef4f,     0x54004d8f,
     0xd40658e1,     0xd4014d22,     0xd4046543,     0xd4273f60,
     0xd44cad80,     0xd503201f,     0xd69f03e0,     0xd6bf03e0,
     0xd5033fdf,     0xd5033e9f,     0xd50332bf,     0xd61f0200,
@@ -1655,7 +1056,7 @@
     0x791f226d,     0xf95aa2f3,     0xb9587bb7,     0x395f7176,
     0x795d9143,     0x399e7e08,     0x799a2697,     0x79df3422,
     0xb99c2624,     0xfd5c2374,     0xbd5fa1d9,     0xfd1d595a,
-    0xbd1b1869,     0x580033db,     0x1800000b,     0xf8945060,
+    0xbd1b1869,     0x58003ddb,     0x1800000b,     0xf8945060,
     0xd8000000,     0xf8ae6ba0,     0xf99a0080,     0x1a070035,
     0x3a0700a8,     0x5a0e0367,     0x7a11009b,     0x9a000380,
     0xba1e030c,     0xda0f0320,     0xfa030301,     0x0b340b12,
@@ -1732,33 +1133,53 @@
     0xba5fd3e3,     0x3a5f03e5,     0xfa411be4,     0x7a42cbe2,
     0x93df03ff,     0xc820ffff,     0x8822fc7f,     0xc8247cbf,
     0x88267fff,     0x4e010fe0,     0x4e081fe1,     0x4e0c1fe1,
-    0x4e0a1fe1,     0x4e071fe1,     0x4cc0ac3f,     0x1e601000,
-    0x1e603000,     0x1e621000,     0x1e623000,     0x1e641000,
-    0x1e643000,     0x1e661000,     0x1e663000,     0x1e681000,
-    0x1e683000,     0x1e6a1000,     0x1e6a3000,     0x1e6c1000,
-    0x1e6c3000,     0x1e6e1000,     0x1e6e3000,     0x1e701000,
-    0x1e703000,     0x1e721000,     0x1e723000,     0x1e741000,
-    0x1e743000,     0x1e761000,     0x1e763000,     0x1e781000,
-    0x1e783000,     0x1e7a1000,     0x1e7a3000,     0x1e7c1000,
-    0x1e7c3000,     0x1e7e1000,     0x1e7e3000,     0xf82f8075,
-    0xf8380328,     0xf8341230,     0xf8222001,     0xf8383064,
-    0xf82c539f,     0xf82a405a,     0xf82c73f2,     0xf82163ad,
-    0xf8a08193,     0xf8b101b6,     0xf8bc13fe,     0xf8a1239a,
-    0xf8a4309e,     0xf8a6535e,     0xf8b24109,     0xf8ac7280,
-    0xf8a16058,     0xf8e08309,     0xf8fa03d0,     0xf8e312ea,
-    0xf8ea2244,     0xf8e2310b,     0xf8ea522f,     0xf8e2418a,
-    0xf8ec71af,     0xf8e26287,     0xf87a8090,     0xf8620184,
-    0xf8721215,     0xf87222ab,     0xf877334c,     0xf87751dc,
-    0xf86b4038,     0xf86c715f,     0xf8706047,     0xb823826d,
-    0xb8310070,     0xb82113cb,     0xb82521e8,     0xb83d301e,
-    0xb8345287,     0xb83742bc,     0xb83b70b9,     0xb8216217,
-    0xb8bf8185,     0xb8a901fc,     0xb8bd13f6,     0xb8b320bf,
-    0xb8ae33f2,     0xb8b2529b,     0xb8b0416c,     0xb8a973c6,
-    0xb8b1639b,     0xb8fe8147,     0xb8f4008a,     0xb8f81231,
-    0xb8f623a3,     0xb8ef3276,     0xb8f35056,     0xb8ef4186,
-    0xb8f071ab,     0xb8f763c1,     0xb8738225,     0xb86202d0,
-    0xb86d12aa,     0xb87d219b,     0xb87b3023,     0xb87f5278,
-    0xb8714389,     0xb87b70ef,     0xb87563f7,
+    0x4e0a1fe1,     0x4e071fe1,     0x4cc0ac3f,     0x05a08020,
+    0x04b0e3e0,     0x0470e7e1,     0x042f9c20,     0x043f9c35,
+    0x047f9c20,     0x04ff9c20,     0x04299420,     0x04319160,
+    0x0461943e,     0x04a19020,     0x042053ff,     0x047f5401,
+    0x25208028,     0x2538cfe0,     0x2578d001,     0x25b8efe2,
+    0x25f8f007,     0xa400a3e0,     0xa4a8a7ea,     0xa547a814,
+    0xa4084ffe,     0xa55c53e0,     0xa5e1540b,     0xe400fbf6,
+    0xe408ffff,     0xe547e400,     0xe4014be0,     0xe4a84fe0,
+    0xe5f25000,     0x858043e0,     0x85a043ff,     0xe59f5d08,
+    0x1e601000,     0x1e603000,     0x1e621000,     0x1e623000,
+    0x1e641000,     0x1e643000,     0x1e661000,     0x1e663000,
+    0x1e681000,     0x1e683000,     0x1e6a1000,     0x1e6a3000,
+    0x1e6c1000,     0x1e6c3000,     0x1e6e1000,     0x1e6e3000,
+    0x1e701000,     0x1e703000,     0x1e721000,     0x1e723000,
+    0x1e741000,     0x1e743000,     0x1e761000,     0x1e763000,
+    0x1e781000,     0x1e783000,     0x1e7a1000,     0x1e7a3000,
+    0x1e7c1000,     0x1e7c3000,     0x1e7e1000,     0x1e7e3000,
+    0xf82f8075,     0xf8380328,     0xf8341230,     0xf8222001,
+    0xf8383064,     0xf82c539f,     0xf82a405a,     0xf82c73f2,
+    0xf82163ad,     0xf8a08193,     0xf8b101b6,     0xf8bc13fe,
+    0xf8a1239a,     0xf8a4309e,     0xf8a6535e,     0xf8b24109,
+    0xf8ac7280,     0xf8a16058,     0xf8e08309,     0xf8fa03d0,
+    0xf8e312ea,     0xf8ea2244,     0xf8e2310b,     0xf8ea522f,
+    0xf8e2418a,     0xf8ec71af,     0xf8e26287,     0xf87a8090,
+    0xf8620184,     0xf8721215,     0xf87222ab,     0xf877334c,
+    0xf87751dc,     0xf86b4038,     0xf86c715f,     0xf8706047,
+    0xb823826d,     0xb8310070,     0xb82113cb,     0xb82521e8,
+    0xb83d301e,     0xb8345287,     0xb83742bc,     0xb83b70b9,
+    0xb8216217,     0xb8bf8185,     0xb8a901fc,     0xb8bd13f6,
+    0xb8b320bf,     0xb8ae33f2,     0xb8b2529b,     0xb8b0416c,
+    0xb8a973c6,     0xb8b1639b,     0xb8fe8147,     0xb8f4008a,
+    0xb8f81231,     0xb8f623a3,     0xb8ef3276,     0xb8f35056,
+    0xb8ef4186,     0xb8f071ab,     0xb8f763c1,     0xb8738225,
+    0xb86202d0,     0xb86d12aa,     0xb87d219b,     0xb87b3023,
+    0xb87f5278,     0xb8714389,     0xb87b70ef,     0xb87563f7,
+    0x04fe0058,     0x04b60552,     0x65c00222,     0x65c20ad9,
+    0x65db046c,     0x0416b35c,     0x04001e49,     0x045085e4,
+    0x04daa856,     0x04d39cb4,     0x041191c0,     0x04900b79,
+    0x0497bb1a,     0x049ea4c0,     0x040805e0,     0x044a04a9,
+    0x0481069b,     0x049ca554,     0x65c09cd0,     0x65cd8fa2,
+    0x65c69ac2,     0x65c78f6e,     0x65828457,     0x04ddb14a,
+    0x65c2ac76,     0x65c0a430,     0x6581b190,     0x658da20c,
+    0x658194b4,     0x65fb1187,     0x65bc2450,     0x65b34624,
+    0x65f8750c,     0x04174152,     0x04107db3,     0x042e30e0,
+    0x04aa3119,     0x047b32d4,     0x049a2e43,     0x04182787,
+    0x04992a00,     0x044825f6,     0x040a2b36,     0x65c731be,
+    0x658621ab,     0x65983334,     0x04412624,
   };
 // END  Generated code -- do not edit
 
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -139,6 +139,9 @@
 // Java stack pointer
 REGISTER_DECLARATION(Register, esp,      r20);
 
+// Preserved predicate register with all elements set TRUE.
+REGISTER_DECLARATION(PRegister, ptrue, p7);
+
 #define assert_cond(ARG1) assert(ARG1, #ARG1)
 
 namespace asm_util {
@@ -273,6 +276,14 @@
     f(r->encoding_nocheck(), lsb + 4, lsb);
   }
 
+  void prf(PRegister r, int lsb) {
+    f(r->encoding_nocheck(), lsb + 3, lsb);
+  }
+
+  void pgrf(PRegister r, int lsb) {
+    f(r->encoding_nocheck(), lsb + 2, lsb);
+  }
+
   unsigned get(int msb = 31, int lsb = 0) {
     int nbits = msb - lsb + 1;
     unsigned mask = ((1U << nbits) - 1) << lsb;
@@ -561,6 +572,18 @@
   void lea(MacroAssembler *, Register) const;
 
   static bool offset_ok_for_immed(int64_t offset, uint shift);
+
+  static bool offset_ok_for_sve_immed(long offset, int shift, int vl /* sve vector length */) {
+    if (offset % vl == 0) {
+      // Convert address offset into sve imm offset (MUL VL).
+      int sve_offset = offset / vl;
+      if (((-(1 << (shift - 1))) <= sve_offset) && (sve_offset < (1 << (shift - 1)))) {
+        // sve_offset can be encoded
+        return true;
+      }
+    }
+    return false;
+  }
 };
 
 // Convience classes
@@ -684,6 +707,12 @@
   void rf(FloatRegister reg, int lsb) {
     current->rf(reg, lsb);
   }
+  void prf(PRegister reg, int lsb) {
+    current->prf(reg, lsb);
+  }
+  void pgrf(PRegister reg, int lsb) {
+    current->pgrf(reg, lsb);
+  }
   void fixed(unsigned value, unsigned mask) {
     current->fixed(value, mask);
   }
@@ -2104,6 +2133,21 @@
 #undef INSN
 #undef INSN1
 
+// Floating-point compare. 3-registers versions (scalar).
+#define INSN(NAME, sz, e)                                             \
+  void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {   \
+    starti;                                                           \
+    f(0b01111110, 31, 24), f(e, 23), f(sz, 22), f(1, 21), rf(Vm, 16); \
+    f(0b111011, 15, 10), rf(Vn, 5), rf(Vd, 0);                        \
+  }                                                                   \
+
+  INSN(facged, 1, 0); // facge-double
+  INSN(facges, 0, 0); // facge-single
+  INSN(facgtd, 1, 1); // facgt-double
+  INSN(facgts, 0, 1); // facgt-single
+
+#undef INSN
+
   // Floating-point Move (immediate)
 private:
   unsigned pack(double value);
@@ -2554,20 +2598,17 @@
     f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0);
   }
 
-  void umov(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) {
-    starti;
-    f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21);
-    f(((idx<<1)|1)<<(int)T, 20, 16), f(0b001111, 15, 10);
-    rf(Vn, 5), rf(Rd, 0);
+#define INSN(NAME, op)                                                     \
+  void NAME(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) {   \
+    starti;                                                                \
+    f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21);                   \
+    f(((idx<<1)|1)<<(int)T, 20, 16), f(op, 15, 10);                        \
+    rf(Vn, 5), rf(Rd, 0);                                                  \
   }
 
-  void smov(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) {
-    starti;
-    f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21);
-    f(((idx<<1)|1)<<(int)T, 20, 16), f(0b001011, 15, 10);
-    rf(Vn, 5), rf(Rd, 0);
-  }
-
+  INSN(umov, 0b001111);
+  INSN(smov, 0b001011);
+#undef INSN
 
 #define INSN(NAME, opc, opc2, isSHR)                                    \
   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
@@ -2598,6 +2639,20 @@
 
 #undef INSN
 
+#define INSN(NAME, opc, opc2, isSHR)                                    \
+  void NAME(FloatRegister Vd, FloatRegister Vn, int shift){             \
+    starti;                                                             \
+    int encodedShift = isSHR ? 128 - shift : 64 + shift;                \
+    f(0b01, 31, 30), f(opc, 29), f(0b111110, 28, 23),                   \
+    f(encodedShift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0);     \
+  }
+
+  INSN(shld,  0, 0b010101, /* isSHR = */ false);
+  INSN(sshrd, 0, 0b000001, /* isSHR = */ true);
+  INSN(ushrd, 1, 0b000001, /* isSHR = */ true);
+
+#undef INSN
+
 private:
   void _xshll(sign_kind sign, FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
     starti;
@@ -2814,7 +2869,7 @@
 
 #undef INSN
 
-void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
+  void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
   {
     starti;
     assert(T == T8B || T == T16B, "invalid arrangement");
@@ -2824,6 +2879,292 @@
     f(0, 10), rf(Vn, 5), rf(Vd, 0);
   }
 
+// SVE arithmetics - unpredicated
+#define INSN(NAME, opcode)                                                             \
+  void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
+    starti;                                                                            \
+    assert(T != Q, "invalid register variant");                                        \
+    f(0b00000100, 31, 24), f(T, 23, 22), f(1, 21),                                     \
+    rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0);                 \
+  }
+  INSN(sve_add, 0b000);
+  INSN(sve_sub, 0b001);
+#undef INSN
+
+// SVE floating-point arithmetic - unpredicated
+#define INSN(NAME, opcode)                                                             \
+  void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
+    starti;                                                                            \
+    assert(T == S || T == D, "invalid register variant");                              \
+    f(0b01100101, 31, 24), f(T, 23, 22), f(0, 21),                                     \
+    rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0);                 \
+  }
+
+  INSN(sve_fadd, 0b000);
+  INSN(sve_fmul, 0b010);
+  INSN(sve_fsub, 0b001);
+#undef INSN
+
+private:
+  void sve_predicate_reg_insn(unsigned op24, unsigned op13,
+                              FloatRegister Zd_or_Vd, SIMD_RegVariant T,
+                              PRegister Pg, FloatRegister Zn_or_Vn) {
+    starti;
+    f(op24, 31, 24), f(T, 23, 22), f(op13, 21, 13);
+    pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0);
+  }
+
+public:
+
+// SVE integer arithmetics - predicate
+#define INSN(NAME, op1, op2)                                                                            \
+  void NAME(FloatRegister Zdn_or_Zd_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm_or_Vn) {  \
+    assert(T != Q, "invalid register variant");                                                         \
+    sve_predicate_reg_insn(op1, op2, Zdn_or_Zd_or_Vd, T, Pg, Znm_or_Vn);                                \
+  }
+
+  INSN(sve_abs,  0b00000100, 0b010110101); // vector abs, unary
+  INSN(sve_add,  0b00000100, 0b000000000); // vector add
+  INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar
+  INSN(sve_asr,  0b00000100, 0b010000100); // vector arithmetic shift right
+  INSN(sve_cnt,  0b00000100, 0b011010101)  // count non-zero bits
+  INSN(sve_cpy,  0b00000101, 0b100000100); // copy scalar to each active vector element
+  INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar
+  INSN(sve_lsl,  0b00000100, 0b010011100); // vector logical shift left
+  INSN(sve_lsr,  0b00000100, 0b010001100); // vector logical shift right
+  INSN(sve_mul,  0b00000100, 0b010000000); // vector mul
+  INSN(sve_neg,  0b00000100, 0b010111101); // vector neg, unary
+  INSN(sve_not,  0b00000100, 0b011110101); // bitwise invert vector, unary
+  INSN(sve_orv,  0b00000100, 0b011000001); // bitwise or reduction to scalar
+  INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors
+  INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
+  INSN(sve_smin,  0b00000100, 0b001010000); // signed minimum vectors
+  INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
+  INSN(sve_sub,   0b00000100, 0b000001000); // vector sub
+  INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
+#undef INSN
+
+// SVE floating-point arithmetics - predicate
+#define INSN(NAME, op1, op2)                                                                          \
+  void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \
+    assert(T == S || T == D, "invalid register variant");                                             \
+    sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm);                               \
+  }
+
+  INSN(sve_fabs,    0b00000100, 0b011100101);
+  INSN(sve_fadd,    0b01100101, 0b000000100);
+  INSN(sve_fadda,   0b01100101, 0b011000001); // add strictly-ordered reduction to scalar Vd
+  INSN(sve_fdiv,    0b01100101, 0b001101100);
+  INSN(sve_fmax,    0b01100101, 0b000110100); // floating-point maximum
+  INSN(sve_fmaxv,   0b01100101, 0b000110001); // floating-point maximum recursive reduction to scalar
+  INSN(sve_fmin,    0b01100101, 0b000111100); // floating-point minimum
+  INSN(sve_fminv,   0b01100101, 0b000111001); // floating-point minimum recursive reduction to scalar
+  INSN(sve_fmul,    0b01100101, 0b000010100);
+  INSN(sve_fneg,    0b00000100, 0b011101101);
+  INSN(sve_frintm,  0b01100101, 0b000010101); // floating-point round to integral value, toward minus infinity
+  INSN(sve_frintn,  0b01100101, 0b000000101); // floating-point round to integral value, nearest with ties to even
+  INSN(sve_frintp,  0b01100101, 0b000001101); // floating-point round to integral value, toward plus infinity
+  INSN(sve_fsqrt,   0b01100101, 0b001101101);
+  INSN(sve_fsub,    0b01100101, 0b000001100);
+#undef INSN
+
+  // SVE multiple-add/sub - predicated
+#define INSN(NAME, op0, op1, op2)                                                                     \
+  void NAME(FloatRegister Zda, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \
+    starti;                                                                                           \
+    assert(T != Q, "invalid size");                                                                   \
+    f(op0, 31, 24), f(T, 23, 22), f(op1, 21), rf(Zm, 16);                                             \
+    f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0);                                              \
+  }
+
+  INSN(sve_fmla,  0b01100101, 1, 0b000); // floating-point fused multiply-add: Zda = Zda + Zn * Zm
+  INSN(sve_fmls,  0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm
+  INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm
+  INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm
+  INSN(sve_mla,   0b00000100, 0, 0b010); // multiply-add: Zda = Zda + Zn*Zm
+  INSN(sve_mls,   0b00000100, 0, 0b011); // multiply-subtract: Zda = Zda + -Zn*Zm
+#undef INSN
+
+// SVE bitwise logical - unpredicated
+#define INSN(NAME, opc)                                              \
+  void NAME(FloatRegister Zd, FloatRegister Zn, FloatRegister Zm) {  \
+    starti;                                                          \
+    f(0b00000100, 31, 24), f(opc, 23, 22), f(1, 21),                 \
+    rf(Zm, 16), f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0);           \
+  }
+  INSN(sve_and, 0b00);
+  INSN(sve_eor, 0b10);
+  INSN(sve_orr, 0b01);
+#undef INSN
+
+// SVE shift immediate - unpredicated
+#define INSN(NAME, opc, isSHR)                                                  \
+  void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \
+    starti;                                                                     \
+    /* The encodings for the tszh:tszl:imm3 fields (bits 23:22 20:19 18:16)     \
+     * for shift right is calculated as:                                        \
+     *   0001 xxx       B, shift = 16  - UInt(tszh:tszl:imm3)                   \
+     *   001x xxx       H, shift = 32  - UInt(tszh:tszl:imm3)                   \
+     *   01xx xxx       S, shift = 64  - UInt(tszh:tszl:imm3)                   \
+     *   1xxx xxx       D, shift = 128 - UInt(tszh:tszl:imm3)                   \
+     * for shift left is calculated as:                                         \
+     *   0001 xxx       B, shift = UInt(tszh:tszl:imm3) - 8                     \
+     *   001x xxx       H, shift = UInt(tszh:tszl:imm3) - 16                    \
+     *   01xx xxx       S, shift = UInt(tszh:tszl:imm3) - 32                    \
+     *   1xxx xxx       D, shift = UInt(tszh:tszl:imm3) - 64                    \
+     */                                                                         \
+    assert(T != Q, "Invalid register variant");                                 \
+    if (isSHR) {                                                                \
+      assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value"); \
+    } else {                                                                    \
+      assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value"); \
+    }                                                                           \
+    int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0)));                              \
+    int encodedShift = isSHR ? cVal - shift : cVal + shift;                     \
+    int tszh = encodedShift >> 5;                                               \
+    int tszl_imm = encodedShift & 0x1f;                                         \
+    f(0b00000100, 31, 24);                                                      \
+    f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16);                              \
+    f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0);                     \
+  }
+
+  INSN(sve_asr, 0b100, /* isSHR = */ true);
+  INSN(sve_lsl, 0b111, /* isSHR = */ false);
+  INSN(sve_lsr, 0b101, /* isSHR = */ true);
+#undef INSN
+
+private:
+
+  // Scalar base + immediate index
+  void sve_ld_st1(FloatRegister Zt, Register Xn, int imm, PRegister Pg,
+              SIMD_RegVariant T, int op1, int type, int op2) {
+    starti;
+    assert_cond(T >= type);
+    f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
+    f(0, 20), sf(imm, 19, 16), f(op2, 15, 13);
+    pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
+  }
+
+  // Scalar base + scalar index
+  void sve_ld_st1(FloatRegister Zt, Register Xn, Register Xm, PRegister Pg,
+              SIMD_RegVariant T, int op1, int type, int op2) {
+    starti;
+    assert_cond(T >= type);
+    f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
+    rf(Xm, 16), f(op2, 15, 13);
+    pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
+  }
+
+  void sve_ld_st1(FloatRegister Zt, PRegister Pg,
+              SIMD_RegVariant T, const Address &a,
+              int op1, int type, int imm_op2, int scalar_op2) {
+    switch (a.getMode()) {
+    case Address::base_plus_offset:
+      sve_ld_st1(Zt, a.base(), a.offset(), Pg, T, op1, type, imm_op2);
+      break;
+    case Address::base_plus_offset_reg:
+      sve_ld_st1(Zt, a.base(), a.index(), Pg, T, op1, type, scalar_op2);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+  }
+
+public:
+
+// SVE load/store - predicated
+#define INSN(NAME, op1, type, imm_op2, scalar_op2)                                   \
+  void NAME(FloatRegister Zt, SIMD_RegVariant T, PRegister Pg, const Address &a) {   \
+    assert(T != Q, "invalid register variant");                                      \
+    sve_ld_st1(Zt, Pg, T, a, op1, type, imm_op2, scalar_op2);                        \
+  }
+
+  INSN(sve_ld1b, 0b1010010, 0b00, 0b101, 0b010);
+  INSN(sve_st1b, 0b1110010, 0b00, 0b111, 0b010);
+  INSN(sve_ld1h, 0b1010010, 0b01, 0b101, 0b010);
+  INSN(sve_st1h, 0b1110010, 0b01, 0b111, 0b010);
+  INSN(sve_ld1w, 0b1010010, 0b10, 0b101, 0b010);
+  INSN(sve_st1w, 0b1110010, 0b10, 0b111, 0b010);
+  INSN(sve_ld1d, 0b1010010, 0b11, 0b101, 0b010);
+  INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010);
+#undef INSN
+
+// SVE load/store - unpredicated
+#define INSN(NAME, op1)                                                         \
+  void NAME(FloatRegister Zt, const Address &a)  {                              \
+    starti;                                                                     \
+    assert(a.index() == noreg, "invalid address variant");                      \
+    f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16),          \
+    f(0b010, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), rf(Zt, 0); \
+  }
+
+  INSN(sve_ldr, 0b100); // LDR (vector)
+  INSN(sve_str, 0b111); // STR (vector)
+#undef INSN
+
+#define INSN(NAME, op) \
+  void NAME(Register Xd, Register Xn, int imm6) {                 \
+    starti;                                                       \
+    f(0b000001000, 31, 23), f(op, 22, 21);                        \
+    srf(Xn, 16), f(0b01010, 15, 11), sf(imm6, 10, 5), srf(Xd, 0); \
+  }
+
+  INSN(sve_addvl, 0b01);
+  INSN(sve_addpl, 0b11);
+#undef INSN
+
+// SVE inc/dec register by element count
+#define INSN(NAME, op) \
+  void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \
+    starti;                                                                              \
+    assert(T != Q, "invalid size");                                                      \
+    f(0b00000100,31, 24), f(T, 23, 22), f(0b11, 21, 20);                                 \
+    f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(op, 10), f(pattern, 9, 5), rf(Xdn, 0);    \
+  }
+
+  INSN(sve_inc, 0);
+  INSN(sve_dec, 1);
+#undef INSN
+
+  // SVE predicate count
+  void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) {
+    starti;
+    assert(T != Q, "invalid size");
+    f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14);
+    prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0);
+  }
+
+  // SVE dup scalar
+  void sve_dup(FloatRegister Zd, SIMD_RegVariant T, Register Rn) {
+    starti;
+    assert(T != Q, "invalid size");
+    f(0b00000101, 31, 24), f(T, 23, 22), f(0b100000001110, 21, 10);
+    srf(Rn, 5), rf(Zd, 0);
+  }
+
+  // SVE dup imm
+  void sve_dup(FloatRegister Zd, SIMD_RegVariant T, int imm8) {
+    starti;
+    assert(T != Q, "invalid size");
+    int sh = 0;
+    if (imm8 <= 127 && imm8 >= -128) {
+      sh = 0;
+    } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) {
+      sh = 1;
+      imm8 = (imm8 >> 8);
+    } else {
+      guarantee(false, "invalid immediate");
+    }
+    f(0b00100101, 31, 24), f(T, 23, 22), f(0b11100011, 21, 14);
+    f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
+  }
+
+  void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) {
+    starti;
+    f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10);
+    f(pattern, 9, 5), f(0b0, 4), prf(pd, 0);
+  }
+
   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
   }
 
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -431,8 +431,12 @@
     ZSetupArguments setup_arguments(masm, stub);
     __ mov(rscratch1, stub->slow_path());
     __ blr(rscratch1);
+    if (UseSVE > 0) {
+      // Reinitialize the ptrue predicate register, in case the external runtime
+      // call clobbers ptrue reg, as we may return to SVE compiled code.
+      __ reinitialize_ptrue();
+    }
   }
-
   // Stub exit
   __ b(*stub->continuation());
 }
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -99,6 +99,9 @@
           "Avoid generating unaligned memory accesses")                 \
   product(bool, UseLSE, false,                                          \
           "Use LSE instructions")                                       \
+  product(uint, UseSVE, 0,                                              \
+          "Highest supported SVE instruction set version")              \
+          range(0, 2)                                                   \
   product(bool, UseBlockZeroing, true,                                  \
           "Use DC ZVA for block zeroing")                               \
   product(intx, BlockZeroingLowLimit, 256,                              \
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -2117,9 +2117,16 @@
 }
 
 // Push lots of registers in the bit set supplied.  Don't push sp.
-// Return the number of words pushed
+// Return the number of dwords pushed
 int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
   int words_pushed = 0;
+  bool use_sve = false;
+  int sve_vector_size_in_bytes = 0;
+
+#ifdef COMPILER2
+  use_sve = Matcher::supports_scalable_vector();
+  sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+#endif
 
   // Scan bitset to accumulate register pairs
   unsigned char regs[32];
@@ -2134,9 +2141,19 @@
     return 0;
   }
 
+  // SVE
+  if (use_sve && sve_vector_size_in_bytes > 16) {
+    sub(stack, stack, sve_vector_size_in_bytes * count);
+    for (int i = 0; i < count; i++) {
+      sve_str(as_FloatRegister(regs[i]), Address(stack, i));
+    }
+    return count * sve_vector_size_in_bytes / 8;
+  }
+
+  // NEON
   if (count == 1) {
     strq(as_FloatRegister(regs[0]), Address(pre(stack, -wordSize * 2)));
-    return 1;
+    return 2;
   }
 
   bool odd = (count & 1) == 1;
@@ -2157,12 +2174,19 @@
   }
 
   assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
-  return count;
-}
-
+  return count * 2;
+}
+
+// Return the number of dwords poped
 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
   int words_pushed = 0;
-
+  bool use_sve = false;
+  int sve_vector_size_in_bytes = 0;
+
+#ifdef COMPILER2
+  use_sve = Matcher::supports_scalable_vector();
+  sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+#endif
   // Scan bitset to accumulate register pairs
   unsigned char regs[32];
   int count = 0;
@@ -2176,9 +2200,19 @@
     return 0;
   }
 
+  // SVE
+  if (use_sve && sve_vector_size_in_bytes > 16) {
+    for (int i = count - 1; i >= 0; i--) {
+      sve_ldr(as_FloatRegister(regs[i]), Address(stack, i));
+    }
+    add(stack, stack, sve_vector_size_in_bytes * count);
+    return count * sve_vector_size_in_bytes / 8;
+  }
+
+  // NEON
   if (count == 1) {
     ldrq(as_FloatRegister(regs[0]), Address(post(stack, wordSize * 2)));
-    return 1;
+    return 2;
   }
 
   bool odd = (count & 1) == 1;
@@ -2199,7 +2233,7 @@
 
   assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
 
-  return count;
+  return count * 2;
 }
 
 #ifdef ASSERT
@@ -2647,23 +2681,39 @@
   pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2) - exclude, sp);
 }
 
-void MacroAssembler::push_CPU_state(bool save_vectors) {
-  int step = (save_vectors ? 8 : 4) * wordSize;
+void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve,
+                                    int sve_vector_size_in_bytes) {
   push(0x3fffffff, sp);         // integer registers except lr & sp
-  mov(rscratch1, -step);
-  sub(sp, sp, step);
-  for (int i = 28; i >= 4; i -= 4) {
-    st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
-        as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
+  if (save_vectors && use_sve && sve_vector_size_in_bytes > 16) {
+    sub(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers);
+    for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
+      sve_str(as_FloatRegister(i), Address(sp, i));
+    }
+  } else {
+    int step = (save_vectors ? 8 : 4) * wordSize;
+    mov(rscratch1, -step);
+    sub(sp, sp, step);
+    for (int i = 28; i >= 4; i -= 4) {
+      st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
+          as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
+    }
+    st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
   }
-  st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
-}
-
-void MacroAssembler::pop_CPU_state(bool restore_vectors) {
-  int step = (restore_vectors ? 8 : 4) * wordSize;
-  for (int i = 0; i <= 28; i += 4)
-    ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
-        as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
+}
+
+void MacroAssembler::pop_CPU_state(bool restore_vectors, bool use_sve,
+                                   int sve_vector_size_in_bytes) {
+  if (restore_vectors && use_sve && sve_vector_size_in_bytes > 16) {
+    for (int i = FloatRegisterImpl::number_of_registers - 1; i >= 0; i--) {
+      sve_ldr(as_FloatRegister(i), Address(sp, i));
+    }
+    add(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers);
+  } else {
+    int step = (restore_vectors ? 8 : 4) * wordSize;
+    for (int i = 0; i <= 28; i += 4)
+      ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
+          as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
+  }
   pop(0x3fffffff, sp);         // integer registers except lr & sp
 }
 
@@ -2712,6 +2762,21 @@
   return Address(base, offset);
 }
 
+Address MacroAssembler::sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp) {
+  assert(offset >= 0, "spill to negative address?");
+
+  Register base = sp;
+
+  // An immediate offset in the range 0 to 255 which is multiplied
+  // by the current vector or predicate register size in bytes.
+  if (offset % sve_reg_size_in_bytes == 0 && offset < ((1<<8)*sve_reg_size_in_bytes)) {
+    return Address(base, offset / sve_reg_size_in_bytes);
+  }
+
+  add(tmp, base, offset);
+  return Address(tmp);
+}
+
 // Checks whether offset is aligned.
 // Returns true if it is, else false.
 bool MacroAssembler::merge_alignment_check(Register base,
@@ -5221,3 +5286,24 @@
     membar(Assembler::AnyAny);
   }
 }
+
+void MacroAssembler::verify_sve_vector_length() {
+  Label verify_ok;
+  assert(UseSVE > 0, "should only be used for SVE");
+  movw(rscratch1, zr);
+  sve_inc(rscratch1, B);
+  subsw(zr, rscratch1, VM_Version::get_initial_sve_vector_length());
+  br(EQ, verify_ok);
+  stop("Error: SVE vector length has changed since jvm startup");
+  bind(verify_ok);
+}
+
+void MacroAssembler::verify_ptrue() {
+  Label verify_ok;
+  assert(UseSVE > 0, "should only be used for SVE");
+  sve_cntp(rscratch1, B, ptrue, ptrue); // get true elements count.
+  sve_dec(rscratch1, B);
+  cbz(rscratch1, verify_ok);
+  stop("Error: the preserved predicate register (p7) elements are not all true");
+  bind(verify_ok);
+}
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -873,8 +873,10 @@
 
   DEBUG_ONLY(void verify_heapbase(const char* msg);)
 
-  void push_CPU_state(bool save_vectors = false);
-  void pop_CPU_state(bool restore_vectors = false) ;
+  void push_CPU_state(bool save_vectors = false, bool use_sve = false,
+                      int sve_vector_size_in_bytes = 0);
+  void pop_CPU_state(bool restore_vectors = false, bool use_sve = false,
+                      int sve_vector_size_in_bytes = 0);
 
   // Round up to a power of two
   void round_to(Register reg, int modulus);
@@ -954,6 +956,11 @@
 
   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 
+  void verify_sve_vector_length();
+  void reinitialize_ptrue() {
+    sve_ptrue(ptrue, B);
+  }
+  void verify_ptrue();
 
   // Debugging
 
@@ -1303,6 +1310,7 @@
   // Returns an address on the stack which is reachable with a ldr/str of size
   // Uses rscratch2 if the address is not directly reachable
   Address spill_address(int size, int offset, Register tmp=rscratch2);
+  Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2);
 
   bool merge_alignment_check(Register base, size_t size, int64_t cur_offset, int64_t prev_offset) const;
 
@@ -1326,6 +1334,9 @@
   void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
     str(Vx, T, spill_address(1 << (int)T, offset));
   }
+  void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
+    sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
+  }
   void unspill(Register Rx, bool is64, int offset) {
     if (is64) {
       ldr(Rx, spill_address(8, offset));
@@ -1336,6 +1347,9 @@
   void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
     ldr(Vx, T, spill_address(1 << (int)T, offset));
   }
+  void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
+    sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
+  }
   void spill_copy128(int src_offset, int dst_offset,
                      Register tmp1=rscratch1, Register tmp2=rscratch2) {
     if (src_offset < 512 && (src_offset & 7) == 0 &&
@@ -1349,7 +1363,15 @@
       spill(tmp1, true, dst_offset+8);
     }
   }
-
+  void spill_copy_sve_vector_stack_to_stack(int src_offset, int dst_offset,
+                                            int sve_vec_reg_size_in_bytes) {
+    assert(sve_vec_reg_size_in_bytes % 16 == 0, "unexpected sve vector reg size");
+    for (int i = 0; i < sve_vec_reg_size_in_bytes / 16; i++) {
+      spill_copy128(src_offset, dst_offset);
+      src_offset += 16;
+      dst_offset += 16;
+    }
+  }
   void cache_wb(Address line);
   void cache_wbsync(bool is_pre);
 };
--- a/src/hotspot/cpu/aarch64/register_aarch64.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/register_aarch64.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,6 +33,9 @@
   = ConcreteRegisterImpl::max_gpr +
     FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
 
+const int ConcreteRegisterImpl::max_pr
+  = ConcreteRegisterImpl::max_fpr + PRegisterImpl::number_of_registers;
+
 const char* RegisterImpl::name() const {
   const char* names[number_of_registers] = {
     "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
@@ -54,3 +57,11 @@
   };
   return is_valid() ? names[encoding()] : "noreg";
 }
+
+const char* PRegisterImpl::name() const {
+  const char* names[number_of_registers] = {
+    "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7",
+    "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"
+  };
+  return is_valid() ? names[encoding()] : "noreg";
+}
--- a/src/hotspot/cpu/aarch64/register_aarch64.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/register_aarch64.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -129,9 +129,10 @@
  public:
   enum {
     number_of_registers = 32,
-    max_slots_per_register = 4,
+    max_slots_per_register = 8,
     save_slots_per_register = 2,
-    extra_save_slots_per_register = max_slots_per_register - save_slots_per_register
+    slots_per_neon_register = 4,
+    extra_save_slots_per_neon_register = slots_per_neon_register - save_slots_per_register
   };
 
   // construction
@@ -187,6 +188,88 @@
 CONSTANT_REGISTER_DECLARATION(FloatRegister, v30    , (30));
 CONSTANT_REGISTER_DECLARATION(FloatRegister, v31    , (31));
 
+// SVE vector registers, shared with the SIMD&FP v0-v31. Vn maps to Zn[127:0].
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z0     , ( 0));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z1     , ( 1));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z2     , ( 2));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z3     , ( 3));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z4     , ( 4));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z5     , ( 5));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z6     , ( 6));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z7     , ( 7));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z8     , ( 8));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z9     , ( 9));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z10    , (10));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z11    , (11));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z12    , (12));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z13    , (13));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z14    , (14));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z15    , (15));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z16    , (16));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z17    , (17));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z18    , (18));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z19    , (19));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z20    , (20));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z21    , (21));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z22    , (22));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z23    , (23));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z24    , (24));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z25    , (25));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z26    , (26));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z27    , (27));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z28    , (28));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z29    , (29));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z30    , (30));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z31    , (31));
+
+
+class PRegisterImpl;
+typedef PRegisterImpl* PRegister;
+inline PRegister as_PRegister(int encoding) {
+  return (PRegister)(intptr_t)encoding;
+}
+
+// The implementation of predicate registers for the architecture
+class PRegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers = 16,
+    max_slots_per_register = 1
+  };
+
+  // construction
+  inline friend PRegister as_PRegister(int encoding);
+
+  VMReg as_VMReg();
+
+  // derived registers, offsets, and addresses
+  PRegister successor() const     { return as_PRegister(encoding() + 1); }
+
+  // accessors
+  int   encoding() const          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+  int   encoding_nocheck() const  { return (intptr_t)this; }
+  bool  is_valid() const          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+  const char* name() const;
+};
+
+// The predicate registers of SVE.
+CONSTANT_REGISTER_DECLARATION(PRegister, p0,  ( 0));
+CONSTANT_REGISTER_DECLARATION(PRegister, p1,  ( 1));
+CONSTANT_REGISTER_DECLARATION(PRegister, p2,  ( 2));
+CONSTANT_REGISTER_DECLARATION(PRegister, p3,  ( 3));
+CONSTANT_REGISTER_DECLARATION(PRegister, p4,  ( 4));
+CONSTANT_REGISTER_DECLARATION(PRegister, p5,  ( 5));
+CONSTANT_REGISTER_DECLARATION(PRegister, p6,  ( 6));
+CONSTANT_REGISTER_DECLARATION(PRegister, p7,  ( 7));
+CONSTANT_REGISTER_DECLARATION(PRegister, p8,  ( 8));
+CONSTANT_REGISTER_DECLARATION(PRegister, p9,  ( 9));
+CONSTANT_REGISTER_DECLARATION(PRegister, p10, (10));
+CONSTANT_REGISTER_DECLARATION(PRegister, p11, (11));
+CONSTANT_REGISTER_DECLARATION(PRegister, p12, (12));
+CONSTANT_REGISTER_DECLARATION(PRegister, p13, (13));
+CONSTANT_REGISTER_DECLARATION(PRegister, p14, (14));
+CONSTANT_REGISTER_DECLARATION(PRegister, p15, (15));
+
 // Need to know the total number of registers of all sorts for SharedInfo.
 // Define a class that exports it.
 class ConcreteRegisterImpl : public AbstractRegisterImpl {
@@ -199,12 +282,14 @@
 
     number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
                            FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
+                           PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers +
                            1) // flags
   };
 
   // added to make it compile
   static const int max_gpr;
   static const int max_fpr;
+  static const int max_pr;
 };
 
 // A set of registers
--- a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -154,3 +154,55 @@
 REGISTER_DEFINITION(Register, rheapbase);
 
 REGISTER_DEFINITION(Register, r31_sp);
+
+REGISTER_DEFINITION(FloatRegister, z0);
+REGISTER_DEFINITION(FloatRegister, z1);
+REGISTER_DEFINITION(FloatRegister, z2);
+REGISTER_DEFINITION(FloatRegister, z3);
+REGISTER_DEFINITION(FloatRegister, z4);
+REGISTER_DEFINITION(FloatRegister, z5);
+REGISTER_DEFINITION(FloatRegister, z6);
+REGISTER_DEFINITION(FloatRegister, z7);
+REGISTER_DEFINITION(FloatRegister, z8);
+REGISTER_DEFINITION(FloatRegister, z9);
+REGISTER_DEFINITION(FloatRegister, z10);
+REGISTER_DEFINITION(FloatRegister, z11);
+REGISTER_DEFINITION(FloatRegister, z12);
+REGISTER_DEFINITION(FloatRegister, z13);
+REGISTER_DEFINITION(FloatRegister, z14);
+REGISTER_DEFINITION(FloatRegister, z15);
+REGISTER_DEFINITION(FloatRegister, z16);
+REGISTER_DEFINITION(FloatRegister, z17);
+REGISTER_DEFINITION(FloatRegister, z18);
+REGISTER_DEFINITION(FloatRegister, z19);
+REGISTER_DEFINITION(FloatRegister, z20);
+REGISTER_DEFINITION(FloatRegister, z21);
+REGISTER_DEFINITION(FloatRegister, z22);
+REGISTER_DEFINITION(FloatRegister, z23);
+REGISTER_DEFINITION(FloatRegister, z24);
+REGISTER_DEFINITION(FloatRegister, z25);
+REGISTER_DEFINITION(FloatRegister, z26);
+REGISTER_DEFINITION(FloatRegister, z27);
+REGISTER_DEFINITION(FloatRegister, z28);
+REGISTER_DEFINITION(FloatRegister, z29);
+REGISTER_DEFINITION(FloatRegister, z30);
+REGISTER_DEFINITION(FloatRegister, z31);
+
+REGISTER_DEFINITION(PRegister, p0);
+REGISTER_DEFINITION(PRegister, p1);
+REGISTER_DEFINITION(PRegister, p2);
+REGISTER_DEFINITION(PRegister, p3);
+REGISTER_DEFINITION(PRegister, p4);
+REGISTER_DEFINITION(PRegister, p5);
+REGISTER_DEFINITION(PRegister, p6);
+REGISTER_DEFINITION(PRegister, p7);
+REGISTER_DEFINITION(PRegister, p8);
+REGISTER_DEFINITION(PRegister, p9);
+REGISTER_DEFINITION(PRegister, p10);
+REGISTER_DEFINITION(PRegister, p11);
+REGISTER_DEFINITION(PRegister, p12);
+REGISTER_DEFINITION(PRegister, p13);
+REGISTER_DEFINITION(PRegister, p14);
+REGISTER_DEFINITION(PRegister, p15);
+
+REGISTER_DEFINITION(PRegister, ptrue);
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -115,11 +115,28 @@
 };
 
 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
+  bool use_sve = false;
+  int sve_vector_size_in_bytes = 0;
+  int sve_vector_size_in_slots = 0;
+
+#ifdef COMPILER2
+  use_sve = Matcher::supports_scalable_vector();
+  sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+  sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT);
+#endif
+
 #if COMPILER2_OR_JVMCI
   if (save_vectors) {
+    int vect_words = 0;
+    int extra_save_slots_per_register = 0;
     // Save upper half of vector registers
-    int vect_words = FloatRegisterImpl::number_of_registers * FloatRegisterImpl::extra_save_slots_per_register /
-                     VMRegImpl::slots_per_word;
+    if (use_sve) {
+      extra_save_slots_per_register = sve_vector_size_in_slots - FloatRegisterImpl::save_slots_per_register;
+    } else {
+      extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register;
+    }
+    vect_words = FloatRegisterImpl::number_of_registers * extra_save_slots_per_register /
+                 VMRegImpl::slots_per_word;
     additional_frame_words += vect_words;
   }
 #else
@@ -138,7 +155,7 @@
 
   // Save Integer and Float registers.
   __ enter();
-  __ push_CPU_state(save_vectors);
+  __ push_CPU_state(save_vectors, use_sve, sve_vector_size_in_bytes);
 
   // Set an oopmap for the call site.  This oopmap will map all
   // oop-registers and debug-info registers as callee-saved.  This
@@ -162,8 +179,13 @@
 
   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
     FloatRegister r = as_FloatRegister(i);
-    int sp_offset = save_vectors ? (FloatRegisterImpl::max_slots_per_register * i) :
-                                   (FloatRegisterImpl::save_slots_per_register * i);
+    int sp_offset = 0;
+    if (save_vectors) {
+      sp_offset = use_sve ? (sve_vector_size_in_slots * i) :
+                            (FloatRegisterImpl::slots_per_neon_register * i);
+    } else {
+      sp_offset = FloatRegisterImpl::save_slots_per_register * i;
+    }
     oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
                               r->as_VMReg());
   }
@@ -172,10 +194,15 @@
 }
 
 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
-#if !COMPILER2_OR_JVMCI
+#ifdef COMPILER2
+  __ pop_CPU_state(restore_vectors, Matcher::supports_scalable_vector(),
+                   Matcher::scalable_vector_reg_size(T_BYTE));
+#else
+#if !INCLUDE_JVMCI
   assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
 #endif
   __ pop_CPU_state(restore_vectors);
+#endif
   __ leave();
 
 }
@@ -1842,6 +1869,11 @@
   // Force this write out before the read below
   __ dmb(Assembler::ISH);
 
+  if (UseSVE > 0) {
+    // Make sure that jni code does not change SVE vector length.
+    __ verify_sve_vector_length();
+  }
+
   // check for safepoint operation in progress and/or pending suspend requests
   Label safepoint_in_progress, safepoint_in_progress_done;
   {
@@ -2774,6 +2806,12 @@
   __ maybe_isb();
   __ membar(Assembler::LoadLoad | Assembler::LoadStore);
 
+  if (UseSVE > 0 && save_vectors) {
+    // Reinitialize the ptrue predicate register, in case the external runtime
+    // call clobbers ptrue reg, as we may return to SVE compiled code.
+    __ reinitialize_ptrue();
+  }
+
   __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
   __ cbz(rscratch1, noException);
 
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -488,6 +488,11 @@
     __ call_VM_leaf(CAST_FROM_FN_PTR(address,
                          SharedRuntime::exception_handler_for_return_address),
                     rthread, c_rarg1);
+    if (UseSVE > 0 ) {
+      // Reinitialize the ptrue predicate register, in case the external runtime
+      // call clobbers ptrue reg, as we may return to SVE compiled code.
+      __ reinitialize_ptrue();
+    }
     // we should not really care that lr is no longer the callee
     // address. we saved the value the handler needs in r19 so we can
     // just copy it to r3. however, the C2 handler will push its own
@@ -5028,6 +5033,12 @@
     __ reset_last_Java_frame(true);
     __ maybe_isb();
 
+    if (UseSVE > 0) {
+      // Reinitialize the ptrue predicate register, in case the external runtime
+      // call clobbers ptrue reg, as we may return to SVE compiled code.
+      __ reinitialize_ptrue();
+    }
+
     __ leave();
 
     // check for pending exceptions
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1372,6 +1372,11 @@
   __ push(dtos);
   __ push(ltos);
 
+  if (UseSVE > 0) {
+    // Make sure that jni code does not change SVE vector length.
+    __ verify_sve_vector_length();
+  }
+
   // change thread state
   __ mov(rscratch1, _thread_in_native_trans);
   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -32,12 +32,14 @@
 #include "runtime/os.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "runtime/vm_version.hpp"
+#include "utilities/formatBuffer.hpp"
 #include "utilities/macros.hpp"
 
 #include OS_HEADER_INLINE(os)
 
+#include <asm/hwcap.h>
 #include <sys/auxv.h>
-#include <asm/hwcap.h>
+#include <sys/prctl.h>
 
 #ifndef HWCAP_AES
 #define HWCAP_AES   (1<<3)
@@ -67,6 +69,20 @@
 #define HWCAP_SHA512 (1 << 21)
 #endif
 
+#ifndef HWCAP_SVE
+#define HWCAP_SVE (1 << 22)
+#endif
+
+#ifndef HWCAP2_SVE2
+#define HWCAP2_SVE2 (1 << 1)
+#endif
+
+#ifndef PR_SVE_GET_VL
+// For old toolchains which do not have SVE related macros defined.
+#define PR_SVE_SET_VL   50
+#define PR_SVE_GET_VL   51
+#endif
+
 int VM_Version::_cpu;
 int VM_Version::_model;
 int VM_Version::_model2;
@@ -74,6 +90,7 @@
 int VM_Version::_revision;
 int VM_Version::_stepping;
 bool VM_Version::_dcpop;
+int VM_Version::_initial_sve_vector_length;
 VM_Version::PsrInfo VM_Version::_psr_info   = { 0, };
 
 static BufferBlob* stub_blob;
@@ -116,7 +133,6 @@
   }
 };
 
-
 void VM_Version::get_processor_features() {
   _supports_cx8 = true;
   _supports_atomic_getset4 = true;
@@ -167,6 +183,7 @@
   }
 
   uint64_t auxv = getauxval(AT_HWCAP);
+  uint64_t auxv2 = getauxval(AT_HWCAP2);
 
   char buf[512];
 
@@ -277,6 +294,12 @@
     }
   }
 
+  if (_cpu == CPU_ARM) {
+    if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
+      FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
+    }
+  }
+
   if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _features |= CPU_STXR_PREFETCH;
   // If an olde style /proc/cpuinfo (cpu_lines == 1) then if _model is an A57 (0xd07)
   // we assume the worst and assume we could be on a big little system and have
@@ -292,6 +315,8 @@
   if (auxv & HWCAP_SHA2)  strcat(buf, ", sha256");
   if (auxv & HWCAP_SHA512) strcat(buf, ", sha512");
   if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse");
+  if (auxv & HWCAP_SVE) strcat(buf, ", sve");
+  if (auxv2 & HWCAP2_SVE2) strcat(buf, ", sve2");
 
   _features_string = os::strdup(buf);
 
@@ -431,6 +456,18 @@
     FLAG_SET_DEFAULT(UseBlockZeroing, false);
   }
 
+  if (auxv & HWCAP_SVE) {
+    if (FLAG_IS_DEFAULT(UseSVE)) {
+      FLAG_SET_DEFAULT(UseSVE, (auxv2 & HWCAP2_SVE2) ? 2 : 1);
+    }
+    if (UseSVE > 0) {
+      _initial_sve_vector_length = prctl(PR_SVE_GET_VL);
+    }
+  } else if (UseSVE > 0) {
+    warning("UseSVE specified, but not supported on current CPU. Disabling SVE.");
+    FLAG_SET_DEFAULT(UseSVE, 0);
+  }
+
   // This machine allows unaligned memory accesses
   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
@@ -465,12 +502,47 @@
     UseMontgomerySquareIntrinsic = true;
   }
 
-  int min_vector_size = 8;
+  if (UseSVE > 0) {
+    if (FLAG_IS_DEFAULT(MaxVectorSize)) {
+      MaxVectorSize = _initial_sve_vector_length;
+    } else if (MaxVectorSize < 16) {
+      warning("SVE does not support vector length less than 16 bytes. Disabling SVE.");
+      UseSVE = 0;
+    } else if ((MaxVectorSize % 16) == 0 && is_power_of_2(MaxVectorSize)) {
+      int new_vl = prctl(PR_SVE_SET_VL, MaxVectorSize);
+      _initial_sve_vector_length = new_vl;
+      // If MaxVectorSize is larger than system largest supported SVE vector length, above prctl()
+      // call will set task vector length to the system largest supported value. So, we also update
+      // MaxVectorSize to that largest supported value.
+      if (new_vl < 0) {
+        vm_exit_during_initialization(
+          err_msg("Current system does not support SVE vector length for MaxVectorSize: %d",
+                  (int)MaxVectorSize));
+      } else if (new_vl != MaxVectorSize) {
+        warning("Current system only supports max SVE vector length %d. Set MaxVectorSize to %d",
+                new_vl, new_vl);
+      }
+      MaxVectorSize = new_vl;
+    } else {
+      vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
+    }
+  }
 
-  if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
-    if (MaxVectorSize < min_vector_size) {
-      warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
-      FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
+  if (UseSVE == 0) {  // NEON
+    int min_vector_size = 8;
+    int max_vector_size = 16;
+    if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
+      if (!is_power_of_2(MaxVectorSize)) {
+        vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
+      } else if (MaxVectorSize < min_vector_size) {
+        warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
+        FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
+      } else if (MaxVectorSize > max_vector_size) {
+        warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
+        FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
+      }
+    } else {
+      FLAG_SET_DEFAULT(MaxVectorSize, 16);
     }
   }
 
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -41,6 +41,8 @@
   static int _revision;
   static int _stepping;
   static bool _dcpop;
+  static int _initial_sve_vector_length;
+
   struct PsrInfo {
     uint32_t dczid_el0;
     uint32_t ctr_el0;
@@ -106,6 +108,7 @@
   static int cpu_variant()                    { return _variant; }
   static int cpu_revision()                   { return _revision; }
   static bool supports_dcpop()                { return _dcpop; }
+  static int get_initial_sve_vector_length()  { return _initial_sve_vector_length; };
   static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); }
   static ByteSize ctr_el0_offset()   { return byte_offset_of(PsrInfo, ctr_el0); }
   static bool is_zva_enabled() {
--- a/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,4 +36,8 @@
                              ConcreteRegisterImpl::max_gpr);
 }
 
+inline VMReg PRegisterImpl::as_VMReg() {
+  return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_fpr);
+}
+
 #endif // CPU_AARCH64_VMREG_AARCH64_INLINE_HPP
--- a/src/hotspot/cpu/arm/arm.ad	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/arm/arm.ad	Tue Sep 08 15:28:06 2020 +0800
@@ -1010,6 +1010,14 @@
   return MaxVectorSize;
 }
 
+const bool Matcher::supports_scalable_vector() {
+  return false;
+}
+
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+  return -1;
+}
+
 // Vector ideal reg corresponding to specified size in bytes
 const uint Matcher::vector_ideal_reg(int size) {
   assert(MaxVectorSize >= size, "");
--- a/src/hotspot/cpu/ppc/ppc.ad	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/ppc/ppc.ad	Tue Sep 08 15:28:06 2020 +0800
@@ -2383,6 +2383,14 @@
   return max_vector_size(bt); // Same as max.
 }
 
+const bool Matcher::supports_scalable_vector() {
+  return false;
+}
+
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+  return -1;
+}
+
 // PPC implementation uses VSX load/store instructions (if
 // SuperwordUseVSX) which support 4 byte but not arbitrary alignment
 const bool Matcher::misaligned_vectors_ok() {
--- a/src/hotspot/cpu/s390/s390.ad	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/s390/s390.ad	Tue Sep 08 15:28:06 2020 +0800
@@ -1614,6 +1614,14 @@
   return max_vector_size(bt); // Same as max.
 }
 
+const bool Matcher::supports_scalable_vector() {
+  return false;
+}
+
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+  return -1;
+}
+
 // z/Architecture does support misaligned store/load at minimal extra cost.
 const bool Matcher::misaligned_vectors_ok() {
   return true;
--- a/src/hotspot/cpu/x86/x86.ad	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/x86/x86.ad	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -1872,6 +1872,14 @@
   return MIN2(size,max_size);
 }
 
+const bool Matcher::supports_scalable_vector() {
+  return false;
+}
+
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+  return -1;
+}
+
 // Vector ideal reg corresponding to specified size in bytes
 const uint Matcher::vector_ideal_reg(int size) {
   assert(MaxVectorSize >= size, "");
--- a/src/hotspot/cpu/x86/x86_64.ad	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/cpu/x86/x86_64.ad	Tue Sep 08 15:28:06 2020 +0800
@@ -2834,7 +2834,7 @@
       RAX_H_num     // Op_RegL
     };
     // Excluded flags and vector registers.
-    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type");
+    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
   %}
 %}
--- a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -267,7 +267,7 @@
     // as to avoid memory stomping of the _mount_path pointer later on in the cgroup v1
     // block in the hybrid case.
     //
-    if (is_cgroupsV2 && sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s cgroup2 %*s", tmp_mount_point, tmp_fs_type) == 2) {
+    if (is_cgroupsV2 && sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s %*s %*s", tmp_mount_point, tmp_fs_type) == 2) {
       // we likely have an early match return (e.g. cgroup fs match), be sure we have cgroup2 as fstype
       if (!cgroupv2_mount_point_found && strcmp("cgroup2", tmp_fs_type) == 0) {
         cgroupv2_mount_point_found = true;
@@ -289,7 +289,7 @@
      * Example for host:
      * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
      */
-    if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s cgroup %s", tmproot, tmpmount, tmp_fs_type, tmpcgroups) == 4) {
+    if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %s", tmproot, tmpmount, tmp_fs_type, tmpcgroups) == 4) {
       if (strcmp("cgroup", tmp_fs_type) != 0) {
         // Skip cgroup2 fs lines on hybrid or unified hierarchy.
         continue;
--- a/src/hotspot/share/adlc/archDesc.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/adlc/archDesc.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -934,6 +934,7 @@
   // Match Vector types.
   if (strncmp(idealOp, "Vec",3)==0) {
     switch(last_char) {
+    case 'A':  return "TypeVect::VECTA";
     case 'S':  return "TypeVect::VECTS";
     case 'D':  return "TypeVect::VECTD";
     case 'X':  return "TypeVect::VECTX";
@@ -944,6 +945,10 @@
     }
   }
 
+  if (strncmp(idealOp, "RegVMask", 8) == 0) {
+    return "Type::BOTTOM";
+  }
+
   // !!!!!
   switch(last_char) {
   case 'I':    return "TypeInt::INT";
--- a/src/hotspot/share/adlc/formssel.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/adlc/formssel.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -3943,6 +3943,8 @@
          strcmp(opType,"RegL")==0 ||
          strcmp(opType,"RegF")==0 ||
          strcmp(opType,"RegD")==0 ||
+         strcmp(opType,"RegVMask")==0 ||
+         strcmp(opType,"VecA")==0 ||
          strcmp(opType,"VecS")==0 ||
          strcmp(opType,"VecD")==0 ||
          strcmp(opType,"VecX")==0 ||
--- a/src/hotspot/share/ci/ciEnv.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/ci/ciEnv.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -761,7 +761,7 @@
   InstanceKlass* accessor_klass = accessor->get_instanceKlass();
   Klass* holder_klass = holder->get_Klass();
   Method* dest_method;
-  LinkInfo link_info(holder_klass, name, sig, accessor_klass, LinkInfo::AccessCheck::required, tag);
+  LinkInfo link_info(holder_klass, name, sig, accessor_klass, LinkInfo::AccessCheck::required, LinkInfo::LoaderConstraintCheck::required, tag);
   switch (bc) {
   case Bytecodes::_invokestatic:
     dest_method =
--- a/src/hotspot/share/ci/ciMethod.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/ci/ciMethod.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -809,7 +809,8 @@
    Symbol* h_signature = signature()->get_symbol();
 
    LinkInfo link_info(resolved, h_name, h_signature, caller_klass,
-                      check_access ? LinkInfo::AccessCheck::required : LinkInfo::AccessCheck::skip);
+                      check_access ? LinkInfo::AccessCheck::required : LinkInfo::AccessCheck::skip,
+                      check_access ? LinkInfo::LoaderConstraintCheck::required : LinkInfo::LoaderConstraintCheck::skip);
    Method* m = NULL;
    // Only do exact lookup if receiver klass has been linked.  Otherwise,
    // the vtable has not been setup, and the LinkResolver will fail.
--- a/src/hotspot/share/classfile/javaClasses.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/classfile/javaClasses.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1135,7 +1135,11 @@
     MN_NESTMATE_CLASS        = 0x00000001,
     MN_HIDDEN_CLASS          = 0x00000002,
     MN_STRONG_LOADER_LINK    = 0x00000004,
-    MN_ACCESS_VM_ANNOTATIONS = 0x00000008
+    MN_ACCESS_VM_ANNOTATIONS = 0x00000008,
+    // Lookup modes
+    MN_MODULE_MODE           = 0x00000010,
+    MN_UNCONDITIONAL_MODE    = 0x00000020,
+    MN_TRUSTED_MODE          = -1
   };
 
   // Accessors for code generation:
--- a/src/hotspot/share/classfile/systemDictionary.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/classfile/systemDictionary.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -2830,7 +2830,7 @@
     // There's special logic on JDK side to handle them
     // (see MethodHandles.linkMethodHandleConstant() and MethodHandles.findVirtualForMH()).
   } else {
-    MethodHandles::resolve_MemberName(mname, caller, /*speculative_resolve*/false, CHECK_(empty));
+    MethodHandles::resolve_MemberName(mname, caller, 0, false /*speculative_resolve*/, CHECK_(empty));
   }
 
   // After method/field resolution succeeded, it's safe to resolve MH signature as well.
--- a/src/hotspot/share/classfile/vmSymbols.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/classfile/vmSymbols.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -815,6 +815,14 @@
   case vmIntrinsics::_isWhitespace:
     if (!UseCharacterCompareIntrinsics) return true;
     break;
+  case vmIntrinsics::_dcopySign:
+  case vmIntrinsics::_fcopySign:
+    if (!InlineMathNatives || !UseCopySignIntrinsic) return true;
+    break;
+  case vmIntrinsics::_dsignum:
+  case vmIntrinsics::_fsignum:
+    if (!InlineMathNatives || !UseSignumIntrinsic) return true;
+    break;
 #endif // COMPILER2
   default:
     return false;
--- a/src/hotspot/share/classfile/vmSymbols.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/classfile/vmSymbols.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -789,6 +789,8 @@
   do_name(negateExact_name,"negateExact")                                                                               \
   do_name(subtractExact_name,"subtractExact")                                                                           \
   do_name(fma_name, "fma")                                                                                              \
+  do_name(copySign_name, "copySign")                                                                                    \
+  do_name(signum_name,"signum")                                                                                         \
                                                                                                                         \
   do_intrinsic(_dabs,                     java_lang_Math,         abs_name,   double_double_signature,           F_S)   \
   do_intrinsic(_fabs,                     java_lang_Math,         abs_name,   float_float_signature,           F_S)   \
@@ -827,6 +829,10 @@
   do_intrinsic(_minF,                     java_lang_Math,         min_name,           float2_float_signature,    F_S)   \
   do_intrinsic(_maxD,                     java_lang_Math,         max_name,           double2_double_signature,  F_S)   \
   do_intrinsic(_minD,                     java_lang_Math,         min_name,           double2_double_signature,  F_S)   \
+  do_intrinsic(_dcopySign,                java_lang_Math,         copySign_name,      double2_double_signature,  F_S)   \
+  do_intrinsic(_fcopySign,                java_lang_Math,         copySign_name,      float2_float_signature,    F_S)   \
+  do_intrinsic(_dsignum,                  java_lang_Math,         signum_name,        double_double_signature,   F_S)   \
+  do_intrinsic(_fsignum,                  java_lang_Math,         signum_name,        float_float_signature,     F_S)   \
                                                                                                                         \
   do_intrinsic(_floatToRawIntBits,        java_lang_Float,        floatToRawIntBits_name,   float_int_signature, F_S)   \
    do_name(     floatToRawIntBits_name,                          "floatToRawIntBits")                                   \
--- a/src/hotspot/share/interpreter/linkResolver.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/interpreter/linkResolver.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -241,6 +241,7 @@
 
   // Coming from the constant pool always checks access
   _check_access  = true;
+  _check_loader_constraints = true;
 }
 
 LinkInfo::LinkInfo(const constantPoolHandle& pool, int index, TRAPS) {
@@ -256,17 +257,20 @@
 
   // Coming from the constant pool always checks access
   _check_access  = true;
+  _check_loader_constraints = true;
 }
 
 #ifndef PRODUCT
 void LinkInfo::print() {
   ResourceMark rm;
-  tty->print_cr("Link resolved_klass=%s name=%s signature=%s current_klass=%s check_access=%s",
+  tty->print_cr("Link resolved_klass=%s name=%s signature=%s current_klass=%s check_access=%s check_loader_constraints=%s",
                 _resolved_klass->name()->as_C_string(),
                 _name->as_C_string(),
                 _signature->as_C_string(),
                 _current_klass == NULL ? "(none)" : _current_klass->name()->as_C_string(),
-                _check_access ? "true" : "false");
+                _check_access ? "true" : "false",
+                _check_loader_constraints ? "true" : "false");
+
 }
 #endif // PRODUCT
 //------------------------------------------------------------------------------------------------------------------------
@@ -795,7 +799,8 @@
                                resolved_method->method_holder(),
                                resolved_method,
                                CHECK_NULL);
-
+  }
+  if (link_info.check_loader_constraints()) {
     // check loader constraints
     check_method_loader_constraints(link_info, resolved_method, "method", CHECK_NULL);
   }
@@ -891,7 +896,8 @@
                                resolved_method->method_holder(),
                                resolved_method,
                                CHECK_NULL);
-
+  }
+  if (link_info.check_loader_constraints()) {
     check_method_loader_constraints(link_info, resolved_method, "interface method", CHECK_NULL);
   }
 
@@ -1055,7 +1061,7 @@
     }
   }
 
-  if ((sel_klass != current_klass) && (current_klass != NULL)) {
+  if (link_info.check_loader_constraints() && (sel_klass != current_klass) && (current_klass != NULL)) {
     check_field_loader_constraints(field, sig, current_klass, sel_klass, CHECK);
   }
 
@@ -1089,7 +1095,8 @@
     // Use updated LinkInfo to reresolve with resolved method holder
     LinkInfo new_info(resolved_klass, link_info.name(), link_info.signature(),
                       link_info.current_klass(),
-                      link_info.check_access() ? LinkInfo::AccessCheck::required : LinkInfo::AccessCheck::skip);
+                      link_info.check_access() ? LinkInfo::AccessCheck::required : LinkInfo::AccessCheck::skip,
+                      link_info.check_loader_constraints() ? LinkInfo::LoaderConstraintCheck::required : LinkInfo::LoaderConstraintCheck::skip);
     resolved_method = linktime_resolve_static_method(new_info, CHECK);
   }
 
@@ -1250,7 +1257,7 @@
         ss.print("'");
         THROW_MSG(vmSymbols::java_lang_AbstractMethodError(), ss.as_string());
       // check loader constraints if found a different method
-      } else if (sel_method() != resolved_method()) {
+      } else if (link_info.check_loader_constraints() && sel_method() != resolved_method()) {
         check_method_loader_constraints(link_info, sel_method, "method", CHECK);
       }
     }
--- a/src/hotspot/share/interpreter/linkResolver.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/interpreter/linkResolver.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -141,10 +141,12 @@
   Klass*      _current_klass;   // class that owns the constant pool
   methodHandle _current_method;  // sending method
   bool        _check_access;
+  bool        _check_loader_constraints;
   constantTag _tag;
 
  public:
   enum class AccessCheck { required, skip };
+  enum class LoaderConstraintCheck { required, skip };
 
   LinkInfo(const constantPoolHandle& pool, int index, const methodHandle& current_method, TRAPS);
   LinkInfo(const constantPoolHandle& pool, int index, TRAPS);
@@ -152,33 +154,38 @@
   // Condensed information from other call sites within the vm.
   LinkInfo(Klass* resolved_klass, Symbol* name, Symbol* signature, Klass* current_klass,
            AccessCheck check_access = AccessCheck::required,
+           LoaderConstraintCheck check_loader_constraints = LoaderConstraintCheck::required,
            constantTag tag = JVM_CONSTANT_Invalid) :
     _name(name),
     _signature(signature), _resolved_klass(resolved_klass), _current_klass(current_klass), _current_method(methodHandle()),
-    _check_access(check_access == AccessCheck::required), _tag(tag) {}
+    _check_access(check_access == AccessCheck::required),
+    _check_loader_constraints(check_loader_constraints == LoaderConstraintCheck::required), _tag(tag) {}
 
   LinkInfo(Klass* resolved_klass, Symbol* name, Symbol* signature, const methodHandle& current_method,
            AccessCheck check_access = AccessCheck::required,
+           LoaderConstraintCheck check_loader_constraints = LoaderConstraintCheck::required,
            constantTag tag = JVM_CONSTANT_Invalid) :
     _name(name),
     _signature(signature), _resolved_klass(resolved_klass), _current_klass(current_method->method_holder()), _current_method(current_method),
-    _check_access(check_access == AccessCheck::required), _tag(tag) {}
+    _check_access(check_access == AccessCheck::required),
+    _check_loader_constraints(check_loader_constraints == LoaderConstraintCheck::required), _tag(tag) {}
+
 
   // Case where we just find the method and don't check access against the current class
   LinkInfo(Klass* resolved_klass, Symbol*name, Symbol* signature) :
     _name(name),
     _signature(signature), _resolved_klass(resolved_klass), _current_klass(NULL), _current_method(methodHandle()),
-    _check_access(false), _tag(JVM_CONSTANT_Invalid) {}
+    _check_access(false), _check_loader_constraints(false), _tag(JVM_CONSTANT_Invalid) {}
 
   // accessors
-  Symbol* name() const               { return _name; }
-  Symbol* signature() const          { return _signature; }
-  Klass* resolved_klass() const      { return _resolved_klass; }
-  Klass* current_klass() const       { return _current_klass; }
-  Method* current_method() const     { return _current_method(); }
-  constantTag tag() const            { return _tag; }
-  bool check_access() const          { return _check_access; }
-
+  Symbol* name() const                  { return _name; }
+  Symbol* signature() const             { return _signature; }
+  Klass* resolved_klass() const         { return _resolved_klass; }
+  Klass* current_klass() const          { return _current_klass; }
+  Method* current_method() const        { return _current_method(); }
+  constantTag tag() const               { return _tag; }
+  bool check_access() const             { return _check_access; }
+  bool check_loader_constraints() const { return _check_loader_constraints; }
   void         print()  PRODUCT_RETURN;
 };
 
--- a/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTrace.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTrace.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -180,6 +180,7 @@
   u4 count = 0;
   _reached_root = true;
 
+  _hash = 1;
   while (!st.at_end()) {
     if (count >= _max_frames) {
       _reached_root = false;
@@ -201,7 +202,9 @@
     }
     const int lineno = method->line_number_from_bci(bci);
     // Can we determine if it's inlined?
-    _hash = (_hash << 2) + (unsigned int)(((size_t)mid >> 2) + (bci << 4) + type);
+    _hash = (_hash * 31) + mid;
+    _hash = (_hash * 31) + bci;
+    _hash = (_hash * 31) + type;
     _frames[count] = JfrStackFrame(mid, bci, type, lineno, method->method_holder());
     st.samples_next();
     count++;
--- a/src/hotspot/share/jvmci/jvmciJavaClasses.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/jvmci/jvmciJavaClasses.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -90,7 +90,7 @@
 #ifndef PRODUCT
 static void check_resolve_method(const char* call_type, Klass* resolved_klass, Symbol* method_name, Symbol* method_signature, TRAPS) {
   Method* method;
-  LinkInfo link_info(resolved_klass, method_name, method_signature, NULL, LinkInfo::AccessCheck::skip);
+  LinkInfo link_info(resolved_klass, method_name, method_signature, NULL, LinkInfo::AccessCheck::skip, LinkInfo::LoaderConstraintCheck::skip);
   if (strcmp(call_type, "call_static") == 0) {
     method = LinkResolver::resolve_static_call_or_null(link_info);
   } else if (strcmp(call_type, "call_virtual") == 0) {
--- a/src/hotspot/share/jvmci/jvmciRuntime.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/jvmci/jvmciRuntime.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1327,7 +1327,7 @@
   assert(check_klass_accessibility(accessor, holder), "holder not accessible");
 
   Method* dest_method;
-  LinkInfo link_info(holder, name, sig, accessor, LinkInfo::AccessCheck::required, tag);
+  LinkInfo link_info(holder, name, sig, accessor, LinkInfo::AccessCheck::required, LinkInfo::LoaderConstraintCheck::required, tag);
   switch (bc) {
   case Bytecodes::_invokestatic:
     dest_method =
--- a/src/hotspot/share/opto/c2compiler.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/c2compiler.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -458,6 +458,18 @@
   case vmIntrinsics::_floor:
     if (!Matcher::match_rule_supported(Op_RoundDoubleMode)) return false;
     break;
+  case vmIntrinsics::_dcopySign:
+    if (!Matcher::match_rule_supported(Op_CopySignD)) return false;
+    break;
+  case vmIntrinsics::_fcopySign:
+    if (!Matcher::match_rule_supported(Op_CopySignF)) return false;
+    break;
+  case vmIntrinsics::_dsignum:
+    if (!Matcher::match_rule_supported(Op_SignumD)) return false;
+    break;
+  case vmIntrinsics::_fsignum:
+    if (!Matcher::match_rule_supported(Op_SignumF)) return false;
+    break;
   case vmIntrinsics::_hashCode:
   case vmIntrinsics::_identityHashCode:
   case vmIntrinsics::_getClass:
--- a/src/hotspot/share/opto/chaitin.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/chaitin.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -77,6 +77,7 @@
   if( _is_oop ) tty->print("Oop ");
   if( _is_float ) tty->print("Float ");
   if( _is_vector ) tty->print("Vector ");
+  if( _is_scalable ) tty->print("Scalable ");
   if( _was_spilled1 ) tty->print("Spilled ");
   if( _was_spilled2 ) tty->print("Spilled2 ");
   if( _direct_conflict ) tty->print("Direct_conflict ");
@@ -644,7 +645,15 @@
           // Live ranges record the highest register in their mask.
           // We want the low register for the AD file writer's convenience.
           OptoReg::Name hi = lrg.reg(); // Get hi register
-          OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
+          int num_regs = lrg.num_regs();
+          if (lrg.is_scalable() && OptoReg::is_stack(hi)) {
+            // For scalable vector registers, when they are allocated in physical
+            // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable
+            // vector. If they are allocated on stack, we need to get the actual
+            // num_regs, which reflects the physical length of scalable registers.
+            num_regs = lrg.scalable_reg_slots();
+          }
+          OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo
           // We have to use pair [lo,lo+1] even for wide vectors because
           // the rest of code generation works only with pairs. It is safe
           // since for registers encoding only 'lo' is used.
@@ -802,8 +811,19 @@
         // Check for vector live range (only if vector register is used).
         // On SPARC vector uses RegD which could be misaligned so it is not
         // processes as vector in RA.
-        if (RegMask::is_vector(ireg))
+        if (RegMask::is_vector(ireg)) {
           lrg._is_vector = 1;
+          if (ireg == Op_VecA) {
+            assert(Matcher::supports_scalable_vector(), "scalable vector should be supported");
+            lrg._is_scalable = 1;
+            // For scalable vector, when it is allocated in physical register,
+            // num_regs is RegMask::SlotsPerVecA for reg mask,
+            // which may not be the actual physical register size.
+            // If it is allocated in stack, we need to get the actual
+            // physical length of scalable vector register.
+            lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT));
+          }
+        }
         assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
                "vector must be in vector registers");
 
@@ -905,6 +925,13 @@
           lrg.set_num_regs(1);
           lrg.set_reg_pressure(1);
           break;
+        case Op_VecA:
+          assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
+          assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity");
+          assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned");
+          lrg.set_num_regs(RegMask::SlotsPerVecA);
+          lrg.set_reg_pressure(1);
+          break;
         case Op_VecS:
           assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
           assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
@@ -1305,6 +1332,46 @@
   return false;
 }
 
+static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) {
+  int num_regs = lrg.num_regs();
+  OptoReg::Name assigned = mask.find_first_set(lrg, num_regs);
+
+  if (lrg.is_scalable()) {
+    // a physical register is found
+    if (chunk == 0 && OptoReg::is_reg(assigned)) {
+      return assigned;
+    }
+
+    // find available stack slots for scalable register
+    if (lrg._is_vector) {
+      num_regs = lrg.scalable_reg_slots();
+      // if actual scalable vector register is exactly SlotsPerVecA * 32 bits
+      if (num_regs == RegMask::SlotsPerVecA) {
+        return assigned;
+      }
+
+      // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it
+      // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits
+      // instead of SlotsPerVecA bits.
+      assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg
+      while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) {
+        // Verify the found reg has scalable_reg_slots() bits set.
+        if (mask.is_valid_reg(assigned, num_regs)) {
+          return assigned;
+        } else {
+          // Remove more for each iteration
+          mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg
+          mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits
+          assigned = mask.find_first_set(lrg, num_regs);
+        }
+      }
+      return OptoReg::Bad; // will cause chunk change, and retry next chunk
+    }
+  }
+
+  return assigned;
+}
+
 // Choose a color using the biasing heuristic
 OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
 
@@ -1338,7 +1405,7 @@
       RegMask tempmask = lrg.mask();
       tempmask.AND(lrgs(copy_lrg).mask());
       tempmask.clear_to_sets(lrg.num_regs());
-      OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
+      OptoReg::Name reg = find_first_set(lrg, tempmask, chunk);
       if (OptoReg::is_valid(reg))
         return reg;
     }
@@ -1347,7 +1414,7 @@
   // If no bias info exists, just go with the register selection ordering
   if (lrg._is_vector || lrg.num_regs() == 2) {
     // Find an aligned set
-    return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
+    return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk);
   }
 
   // CNC - Fun hack.  Alternate 1st and 2nd selection.  Enables post-allocate
@@ -1402,7 +1469,6 @@
     LRG *lrg = &lrgs(lidx);
     _simplified = lrg->_next;
 
-
 #ifndef PRODUCT
     if (trace_spilling()) {
       ttyLocker ttyl;
@@ -1484,7 +1550,6 @@
       // Bump register mask up to next stack chunk
       chunk += RegMask::CHUNK_SIZE;
       lrg->Set_All();
-
       goto retry_next_chunk;
     }
 
@@ -1509,12 +1574,21 @@
       int n_regs = lrg->num_regs();
       assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
       if (n_regs == 1 || !lrg->_fat_proj) {
-        assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
+        if (Matcher::supports_scalable_vector()) {
+          assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity");
+        } else {
+          assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
+        }
         lrg->Clear();           // Clear the mask
         lrg->Insert(reg);       // Set regmask to match selected reg
         // For vectors and pairs, also insert the low bit of the pair
-        for (int i = 1; i < n_regs; i++)
+        // We always choose the high bit, then mask the low bits by register size
+        if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack
+          n_regs = lrg->scalable_reg_slots();
+        }
+        for (int i = 1; i < n_regs; i++) {
           lrg->Insert(OptoReg::add(reg,-i));
+        }
         lrg->set_mask_size(n_regs);
       } else {                  // Else fatproj
         // mask must be equal to fatproj bits, by definition
--- a/src/hotspot/share/opto/chaitin.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/chaitin.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -114,7 +114,9 @@
     _msize_valid=1;
     if (_is_vector) {
       assert(!_fat_proj, "sanity");
-      assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets");
+      if (!(_is_scalable && OptoReg::is_stack(_reg))) {
+        assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets");
+      }
     } else if (_num_regs == 2 && !_fat_proj) {
       assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs");
     }
@@ -137,14 +139,37 @@
   void Remove( OptoReg::Name reg ) { _mask.Remove(reg);  debug_only(_msize_valid=0;) }
   void clear_to_sets()  { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) }
 
+private:
   // Number of registers this live range uses when it colors
-private:
   uint16_t _num_regs;           // 2 for Longs and Doubles, 1 for all else
                                 // except _num_regs is kill count for fat_proj
+
+  // For scalable register, num_regs may not be the actual physical register size.
+  // We need to get the actual physical length of scalable register when scalable
+  // register is spilled. The size of one slot is 32-bit.
+  uint _scalable_reg_slots;     // Actual scalable register length of slots.
+                                // Meaningful only when _is_scalable is true.
 public:
   int num_regs() const { return _num_regs; }
   void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }
 
+  uint scalable_reg_slots() { return _scalable_reg_slots; }
+  void set_scalable_reg_slots(uint slots) {
+    assert(_is_scalable, "scalable register");
+    assert(slots > 0, "slots of scalable register is not valid");
+    _scalable_reg_slots = slots;
+  }
+
+  bool is_scalable() {
+#ifdef ASSERT
+    if (_is_scalable) {
+      // Should only be a vector for now, but it could also be a RegVMask in future.
+      assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg");
+    }
+#endif
+    return _is_scalable;
+  }
+
 private:
   // Number of physical registers this live range uses when it colors
   // Architecture and register-set dependent
@@ -170,6 +195,8 @@
   uint   _is_oop:1,             // Live-range holds an oop
          _is_float:1,           // True if in float registers
          _is_vector:1,          // True if in vector registers
+         _is_scalable:1,        // True if register size is scalable
+                                //      e.g. Arm SVE vector/predicate registers.
          _was_spilled1:1,       // True if prior spilling on def
          _was_spilled2:1,       // True if twice prior spilling on def
          _is_bound:1,           // live range starts life with no
--- a/src/hotspot/share/opto/classes.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/classes.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -292,6 +292,10 @@
 shmacro(ShenandoahEnqueueBarrier)
 shmacro(ShenandoahLoadReferenceBarrier)
 macro(SCMemProj)
+macro(CopySignD)
+macro(CopySignF)
+macro(SignumD)
+macro(SignumF)
 macro(SqrtD)
 macro(SqrtF)
 macro(Start)
--- a/src/hotspot/share/opto/intrinsicnode.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/intrinsicnode.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -98,3 +98,17 @@
   return bottom_type();
 }
 
+//------------------------------CopySign-----------------------------------------
+CopySignDNode* CopySignDNode::make(PhaseGVN& gvn, Node* in1, Node* in2) {
+  return new CopySignDNode(in1, in2, gvn.makecon(TypeD::ZERO));
+}
+
+//------------------------------Signum-------------------------------------------
+SignumDNode* SignumDNode::make(PhaseGVN& gvn, Node* in) {
+  return new SignumDNode(in, gvn.makecon(TypeD::ZERO), gvn.makecon(TypeD::ONE));
+}
+
+SignumFNode* SignumFNode::make(PhaseGVN& gvn, Node* in) {
+  return new SignumFNode(in, gvn.makecon(TypeF::ZERO), gvn.makecon(TypeF::ONE));
+}
+
--- a/src/hotspot/share/opto/intrinsicnode.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/intrinsicnode.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
 
 #include "opto/node.hpp"
 #include "opto/opcodes.hpp"
+#include "opto/connode.hpp"
 
 
 //----------------------PartialSubtypeCheckNode--------------------------------
@@ -216,4 +217,44 @@
   virtual uint ideal_reg() const { return Op_RegI; }
 };
 
+//------------------------------CopySign-----------------------------------------
+class CopySignDNode : public Node {
+ protected:
+  CopySignDNode(Node* in1, Node* in2, Node* in3) : Node(0, in1, in2, in3) {}
+ public:
+  static CopySignDNode* make(PhaseGVN& gvn, Node* in1, Node* in2);
+  virtual int Opcode() const;
+  const Type* bottom_type() const { return TypeLong::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+class CopySignFNode : public Node {
+ public:
+  CopySignFNode(Node* in1, Node* in2) : Node(0, in1, in2) {}
+  virtual int Opcode() const;
+  const Type* bottom_type() const { return TypeLong::FLOAT; }
+  virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------Signum-------------------------------------------
+class SignumDNode : public Node {
+ protected:
+  SignumDNode(Node* in1, Node* in2, Node* in3) : Node(0, in1, in2, in3) {}
+ public:
+  static SignumDNode* make(PhaseGVN& gvn, Node* in);
+  virtual int Opcode() const;
+  virtual const Type* bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+class SignumFNode : public Node {
+ protected:
+  SignumFNode(Node* in1, Node* in2, Node* in3) : Node(0, in1, in2, in3) {}
+ public:
+  static SignumFNode* make(PhaseGVN& gvn, Node* in);
+  virtual int Opcode() const;
+  virtual const Type* bottom_type() const { return Type::FLOAT; }
+  virtual uint ideal_reg() const { return Op_RegF; }
+};
+
 #endif // SHARE_OPTO_INTRINSICNODE_HPP
--- a/src/hotspot/share/opto/library_call.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/library_call.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -264,7 +264,11 @@
   case vmIntrinsics::_dexp:
   case vmIntrinsics::_dlog:
   case vmIntrinsics::_dlog10:
-  case vmIntrinsics::_dpow:                     return inline_math_native(intrinsic_id());
+  case vmIntrinsics::_dpow:
+  case vmIntrinsics::_dcopySign:
+  case vmIntrinsics::_fcopySign:
+  case vmIntrinsics::_dsignum:
+  case vmIntrinsics::_fsignum:                  return inline_math_native(intrinsic_id());
 
   case vmIntrinsics::_min:
   case vmIntrinsics::_max:                      return inline_min_max(intrinsic_id());
@@ -1567,6 +1571,8 @@
   case vmIntrinsics::_ceil:   n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_ceil); break;
   case vmIntrinsics::_floor:  n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_floor); break;
   case vmIntrinsics::_rint:   n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_rint); break;
+  case vmIntrinsics::_dcopySign: n = CopySignDNode::make(_gvn, arg, round_double_node(argument(2))); break;
+  case vmIntrinsics::_dsignum: n = SignumDNode::make(_gvn, arg); break;
   default:  fatal_unexpected_iid(id);  break;
   }
   set_result(_gvn.transform(n));
@@ -1584,6 +1590,8 @@
   case vmIntrinsics::_fabs:   n = new AbsFNode(                arg);  break;
   case vmIntrinsics::_iabs:   n = new AbsINode(                arg);  break;
   case vmIntrinsics::_labs:   n = new AbsLNode(                arg);  break;
+  case vmIntrinsics::_fcopySign: n = new CopySignFNode(arg, argument(1)); break;
+  case vmIntrinsics::_fsignum: n = SignumFNode::make(_gvn, arg); break;
   default:  fatal_unexpected_iid(id);  break;
   }
   set_result(_gvn.transform(n));
@@ -1668,6 +1676,11 @@
   }
 #undef FN_PTR
 
+  case vmIntrinsics::_dcopySign: return inline_double_math(id);
+  case vmIntrinsics::_fcopySign: return inline_math(id);
+  case vmIntrinsics::_dsignum: return inline_double_math(id);
+  case vmIntrinsics::_fsignum: return inline_math(id);
+
    // These intrinsics are not yet correctly implemented
   case vmIntrinsics::_datan2:
     return false;
--- a/src/hotspot/share/opto/matcher.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/matcher.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -88,6 +88,7 @@
   idealreg2spillmask  [Op_RegF] = NULL;
   idealreg2spillmask  [Op_RegD] = NULL;
   idealreg2spillmask  [Op_RegP] = NULL;
+  idealreg2spillmask  [Op_VecA] = NULL;
   idealreg2spillmask  [Op_VecS] = NULL;
   idealreg2spillmask  [Op_VecD] = NULL;
   idealreg2spillmask  [Op_VecX] = NULL;
@@ -101,6 +102,7 @@
   idealreg2debugmask  [Op_RegF] = NULL;
   idealreg2debugmask  [Op_RegD] = NULL;
   idealreg2debugmask  [Op_RegP] = NULL;
+  idealreg2debugmask  [Op_VecA] = NULL;
   idealreg2debugmask  [Op_VecS] = NULL;
   idealreg2debugmask  [Op_VecD] = NULL;
   idealreg2debugmask  [Op_VecX] = NULL;
@@ -114,6 +116,7 @@
   idealreg2mhdebugmask[Op_RegF] = NULL;
   idealreg2mhdebugmask[Op_RegD] = NULL;
   idealreg2mhdebugmask[Op_RegP] = NULL;
+  idealreg2mhdebugmask[Op_VecA] = NULL;
   idealreg2mhdebugmask[Op_VecS] = NULL;
   idealreg2mhdebugmask[Op_VecD] = NULL;
   idealreg2mhdebugmask[Op_VecX] = NULL;
@@ -427,7 +430,7 @@
   return rms;
 }
 
-#define NOF_STACK_MASKS (3*11)
+#define NOF_STACK_MASKS (3*12)
 
 // Create the initial stack mask used by values spilling to the stack.
 // Disallow any debug info in outgoing argument areas by setting the
@@ -463,23 +466,26 @@
   idealreg2mhdebugmask[Op_RegD] = &rms[16];
   idealreg2mhdebugmask[Op_RegP] = &rms[17];
 
-  idealreg2spillmask  [Op_VecS] = &rms[18];
-  idealreg2spillmask  [Op_VecD] = &rms[19];
-  idealreg2spillmask  [Op_VecX] = &rms[20];
-  idealreg2spillmask  [Op_VecY] = &rms[21];
-  idealreg2spillmask  [Op_VecZ] = &rms[22];
+  idealreg2spillmask  [Op_VecA] = &rms[18];
+  idealreg2spillmask  [Op_VecS] = &rms[19];
+  idealreg2spillmask  [Op_VecD] = &rms[20];
+  idealreg2spillmask  [Op_VecX] = &rms[21];
+  idealreg2spillmask  [Op_VecY] = &rms[22];
+  idealreg2spillmask  [Op_VecZ] = &rms[23];
 
-  idealreg2debugmask  [Op_VecS] = &rms[23];
-  idealreg2debugmask  [Op_VecD] = &rms[24];
-  idealreg2debugmask  [Op_VecX] = &rms[25];
-  idealreg2debugmask  [Op_VecY] = &rms[26];
-  idealreg2debugmask  [Op_VecZ] = &rms[27];
+  idealreg2debugmask  [Op_VecA] = &rms[24];
+  idealreg2debugmask  [Op_VecS] = &rms[25];
+  idealreg2debugmask  [Op_VecD] = &rms[26];
+  idealreg2debugmask  [Op_VecX] = &rms[27];
+  idealreg2debugmask  [Op_VecY] = &rms[28];
+  idealreg2debugmask  [Op_VecZ] = &rms[29];
 
-  idealreg2mhdebugmask[Op_VecS] = &rms[28];
-  idealreg2mhdebugmask[Op_VecD] = &rms[29];
-  idealreg2mhdebugmask[Op_VecX] = &rms[30];
-  idealreg2mhdebugmask[Op_VecY] = &rms[31];
-  idealreg2mhdebugmask[Op_VecZ] = &rms[32];
+  idealreg2mhdebugmask[Op_VecA] = &rms[30];
+  idealreg2mhdebugmask[Op_VecS] = &rms[31];
+  idealreg2mhdebugmask[Op_VecD] = &rms[32];
+  idealreg2mhdebugmask[Op_VecX] = &rms[33];
+  idealreg2mhdebugmask[Op_VecY] = &rms[34];
+  idealreg2mhdebugmask[Op_VecZ] = &rms[35];
 
   OptoReg::Name i;
 
@@ -506,6 +512,7 @@
   // Keep spill masks aligned.
   aligned_stack_mask.clear_to_pairs();
   assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+  RegMask scalable_stack_mask = aligned_stack_mask;
 
   *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
 #ifdef _LP64
@@ -591,28 +598,47 @@
     *idealreg2spillmask[Op_VecZ] = RegMask::Empty;
   }
 
-   if (UseFPUForSpilling) {
-     // This mask logic assumes that the spill operations are
-     // symmetric and that the registers involved are the same size.
-     // On sparc for instance we may have to use 64 bit moves will
-     // kill 2 registers when used with F0-F31.
-     idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
-     idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
+  if (Matcher::supports_scalable_vector()) {
+    int k = 1;
+    OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
+    // Exclude last input arg stack slots to avoid spilling vector register there,
+    // otherwise vector spills could stomp over stack slots in caller frame.
+    for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) {
+      scalable_stack_mask.Remove(in);
+      in = OptoReg::add(in, -1);
+    }
+
+    // For VecA
+     scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA);
+     assert(scalable_stack_mask.is_AllStack(), "should be infinite stack");
+    *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA];
+     idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask);
+  } else {
+    *idealreg2spillmask[Op_VecA] = RegMask::Empty;
+  }
+
+  if (UseFPUForSpilling) {
+    // This mask logic assumes that the spill operations are
+    // symmetric and that the registers involved are the same size.
+    // On sparc for instance we may have to use 64 bit moves will
+    // kill 2 registers when used with F0-F31.
+    idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
+    idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
 #ifdef _LP64
-     idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
-     idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
-     idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
-     idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
+    idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
+    idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
+    idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
+    idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
 #else
-     idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
+    idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
 #ifdef ARM
-     // ARM has support for moving 64bit values between a pair of
-     // integer registers and a double register
-     idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
-     idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
+    // ARM has support for moving 64bit values between a pair of
+    // integer registers and a double register
+    idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
+    idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
 #endif
 #endif
-   }
+  }
 
   // Make up debug masks.  Any spill slot plus callee-save (SOE) registers.
   // Caller-save (SOC, AS) registers are assumed to be trashable by the various
@@ -624,6 +650,7 @@
   *idealreg2debugmask  [Op_RegD] = *idealreg2spillmask[Op_RegD];
   *idealreg2debugmask  [Op_RegP] = *idealreg2spillmask[Op_RegP];
 
+  *idealreg2debugmask  [Op_VecA] = *idealreg2spillmask[Op_VecA];
   *idealreg2debugmask  [Op_VecS] = *idealreg2spillmask[Op_VecS];
   *idealreg2debugmask  [Op_VecD] = *idealreg2spillmask[Op_VecD];
   *idealreg2debugmask  [Op_VecX] = *idealreg2spillmask[Op_VecX];
@@ -637,6 +664,7 @@
   *idealreg2mhdebugmask[Op_RegD] = *idealreg2spillmask[Op_RegD];
   *idealreg2mhdebugmask[Op_RegP] = *idealreg2spillmask[Op_RegP];
 
+  *idealreg2mhdebugmask[Op_VecA] = *idealreg2spillmask[Op_VecA];
   *idealreg2mhdebugmask[Op_VecS] = *idealreg2spillmask[Op_VecS];
   *idealreg2mhdebugmask[Op_VecD] = *idealreg2spillmask[Op_VecD];
   *idealreg2mhdebugmask[Op_VecX] = *idealreg2spillmask[Op_VecX];
@@ -656,6 +684,7 @@
   idealreg2debugmask[Op_RegD]->SUBTRACT(*caller_save_mask);
   idealreg2debugmask[Op_RegP]->SUBTRACT(*caller_save_mask);
 
+  idealreg2debugmask[Op_VecA]->SUBTRACT(*caller_save_mask);
   idealreg2debugmask[Op_VecS]->SUBTRACT(*caller_save_mask);
   idealreg2debugmask[Op_VecD]->SUBTRACT(*caller_save_mask);
   idealreg2debugmask[Op_VecX]->SUBTRACT(*caller_save_mask);
@@ -669,6 +698,7 @@
   idealreg2mhdebugmask[Op_RegD]->SUBTRACT(*mh_caller_save_mask);
   idealreg2mhdebugmask[Op_RegP]->SUBTRACT(*mh_caller_save_mask);
 
+  idealreg2mhdebugmask[Op_VecA]->SUBTRACT(*mh_caller_save_mask);
   idealreg2mhdebugmask[Op_VecS]->SUBTRACT(*mh_caller_save_mask);
   idealreg2mhdebugmask[Op_VecD]->SUBTRACT(*mh_caller_save_mask);
   idealreg2mhdebugmask[Op_VecX]->SUBTRACT(*mh_caller_save_mask);
@@ -929,6 +959,7 @@
   idealreg2regmask[Op_RegF] = regmask_for_ideal_register(Op_RegF, ret);
   idealreg2regmask[Op_RegD] = regmask_for_ideal_register(Op_RegD, ret);
   idealreg2regmask[Op_RegL] = regmask_for_ideal_register(Op_RegL, ret);
+  idealreg2regmask[Op_VecA] = regmask_for_ideal_register(Op_VecA, ret);
   idealreg2regmask[Op_VecS] = regmask_for_ideal_register(Op_VecS, ret);
   idealreg2regmask[Op_VecD] = regmask_for_ideal_register(Op_VecD, ret);
   idealreg2regmask[Op_VecX] = regmask_for_ideal_register(Op_VecX, ret);
@@ -1614,7 +1645,6 @@
     }
   }
 
-
   // Call DFA to match this node, and return
   svec->DFA( n->Opcode(), n );
 
@@ -2323,6 +2353,23 @@
       n->del_req(3);
       break;
     }
+    case Op_MulAddS2I: {
+      Node* pair1 = new BinaryNode(n->in(1), n->in(2));
+      Node* pair2 = new BinaryNode(n->in(3), n->in(4));
+      n->set_req(1, pair1);
+      n->set_req(2, pair2);
+      n->del_req(4);
+      n->del_req(3);
+      break;
+    }
+    case Op_CopySignD:
+    case Op_SignumF:
+    case Op_SignumD: {
+      Node* pair = new BinaryNode(n->in(2), n->in(3));
+      n->set_req(2, pair);
+      n->del_req(3);
+      break;
+    }
     case Op_VectorBlend:
     case Op_VectorInsert: {
       Node* pair = new BinaryNode(n->in(1), n->in(2));
@@ -2337,23 +2384,14 @@
       n->del_req(MemNode::ValueIn+1);
       break;
     }
-    case Op_MulAddS2I: {
-      Node* pair1 = new BinaryNode(n->in(1), n->in(2));
-      Node* pair2 = new BinaryNode(n->in(3), n->in(4));
-      n->set_req(1, pair1);
-      n->set_req(2, pair2);
-      n->del_req(4);
-      n->del_req(3);
-      break;
-    }
     case Op_VectorMaskCmp: {
       n->set_req(1, new BinaryNode(n->in(1), n->in(2)));
       n->set_req(2, n->in(3));
       n->del_req(3);
       break;
-    }
     default:
       break;
+    }
   }
 }
 
@@ -2483,7 +2521,7 @@
 const RegMask* Matcher::regmask_for_ideal_register(uint ideal_reg, Node* ret) {
   const Type* t = Type::mreg2type[ideal_reg];
   if (t == NULL) {
-    assert(ideal_reg >= Op_VecS && ideal_reg <= Op_VecZ, "not a vector: %d", ideal_reg);
+    assert(ideal_reg >= Op_VecA && ideal_reg <= Op_VecZ, "not a vector: %d", ideal_reg);
     return NULL; // not supported
   }
   Node* fp  = ret->in(TypeFunc::FramePtr);
@@ -2500,6 +2538,7 @@
     case Op_RegD: spill = new LoadDNode(NULL, mem, fp, atp, t,                 mo); break;
     case Op_RegL: spill = new LoadLNode(NULL, mem, fp, atp, t->is_long(),      mo); break;
 
+    case Op_VecA: // fall-through
     case Op_VecS: // fall-through
     case Op_VecD: // fall-through
     case Op_VecX: // fall-through
--- a/src/hotspot/share/opto/matcher.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/matcher.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -338,6 +338,10 @@
             Matcher::min_vector_size(bt) <= size);
   }
 
+  static const bool supports_scalable_vector();
+  // Actual max scalable vector register length.
+  static const int scalable_vector_reg_size(const BasicType bt);
+
   // Vector ideal reg
   static const uint vector_ideal_reg(int len);
 
--- a/src/hotspot/share/opto/opcodes.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/opcodes.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -38,12 +38,14 @@
   "RegF",
   "RegD",
   "RegL",
-  "RegFlags",
+  "VecA",
   "VecS",
   "VecD",
   "VecX",
   "VecY",
   "VecZ",
+  "RegVMask",
+  "RegFlags",
   "_last_machine_leaf",
 #include "classes.hpp"
   "_last_class_name",
--- a/src/hotspot/share/opto/opcodes.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/opcodes.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,11 +37,13 @@
   macro(RegF)                   // Machine float   register
   macro(RegD)                   // Machine double  register
   macro(RegL)                   // Machine long    register
+  macro(VecA)                   // Machine vectora register
   macro(VecS)                   // Machine vectors register
   macro(VecD)                   // Machine vectord register
   macro(VecX)                   // Machine vectorx register
   macro(VecY)                   // Machine vectory register
   macro(VecZ)                   // Machine vectorz register
+  macro(RegVMask)               // Vector mask/predicate register
   macro(RegFlags)               // Machine flags   register
   _last_machine_leaf,           // Split between regular opcodes and machine
 #include "classes.hpp"
--- a/src/hotspot/share/opto/postaloc.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/postaloc.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -266,9 +266,9 @@
   Node *val = skip_copies(n->in(k));
   if (val == x) return blk_adjust; // No progress?
 
-  int n_regs = RegMask::num_registers(val->ideal_reg());
   uint val_idx = _lrg_map.live_range_id(val);
   OptoReg::Name val_reg = lrgs(val_idx).reg();
+  int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx));
 
   // See if it happens to already be in the correct register!
   // (either Phi's direct register, or the common case of the name
@@ -305,8 +305,26 @@
     }
 
     Node *vv = value[reg];
+    // For scalable register, number of registers may be inconsistent between
+    // "val_reg" and "reg". For example, when "val" resides in register
+    // but "reg" is located in stack.
+    if (lrgs(val_idx).is_scalable()) {
+      assert(val->ideal_reg() == Op_VecA, "scalable vector register");
+      if (OptoReg::is_stack(reg)) {
+        n_regs = lrgs(val_idx).scalable_reg_slots();
+      } else {
+        n_regs = RegMask::SlotsPerVecA;
+      }
+    }
     if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set
-      uint last = (n_regs-1); // Looking for the last part of a set
+      uint last;
+      if (lrgs(val_idx).is_scalable()) {
+        assert(val->ideal_reg() == Op_VecA, "scalable vector register");
+        // For scalable vector register, regmask is always SlotsPerVecA bits aligned
+        last = RegMask::SlotsPerVecA - 1;
+      } else {
+        last = (n_regs-1); // Looking for the last part of a set
+      }
       if ((reg&last) != last) continue; // Wrong part of a set
       if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value
     }
@@ -591,7 +609,7 @@
       uint k;
       Node *phi = block->get_node(j);
       uint pidx = _lrg_map.live_range_id(phi);
-      OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg();
+      OptoReg::Name preg = lrgs(pidx).reg();
 
       // Remove copies remaining on edges.  Check for junk phi.
       Node *u = NULL;
@@ -619,7 +637,7 @@
       if( pidx ) {
         value.map(preg,phi);
         regnd.map(preg,phi);
-        int n_regs = RegMask::num_registers(phi->ideal_reg());
+        int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx));
         for (int l = 1; l < n_regs; l++) {
           OptoReg::Name preg_lo = OptoReg::add(preg,-l);
           value.map(preg_lo,phi);
@@ -663,7 +681,7 @@
             regnd.map(ureg,   def);
             // Record other half of doubles
             uint def_ideal_reg = def->ideal_reg();
-            int n_regs = RegMask::num_registers(def_ideal_reg);
+            int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def)));
             for (int l = 1; l < n_regs; l++) {
               OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
               if (!value[ureg_lo] &&
@@ -707,7 +725,7 @@
       }
 
       uint n_ideal_reg = n->ideal_reg();
-      int n_regs = RegMask::num_registers(n_ideal_reg);
+      int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx));
       if (n_regs == 1) {
         // If Node 'n' does not change the value mapped by the register,
         // then 'n' is a useless copy.  Do not update the register->node
--- a/src/hotspot/share/opto/regmask.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/regmask.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "opto/ad.hpp"
+#include "opto/chaitin.hpp"
 #include "opto/compile.hpp"
 #include "opto/matcher.hpp"
 #include "opto/node.hpp"
@@ -59,30 +60,47 @@
 
 //=============================================================================
 bool RegMask::is_vector(uint ireg) {
-  return (ireg == Op_VecS || ireg == Op_VecD ||
+  return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD ||
           ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ );
 }
 
 int RegMask::num_registers(uint ireg) {
     switch(ireg) {
       case Op_VecZ:
-        return 16;
+        return SlotsPerVecZ;
       case Op_VecY:
-        return 8;
+        return SlotsPerVecY;
       case Op_VecX:
-        return 4;
+        return SlotsPerVecX;
       case Op_VecD:
+        return SlotsPerVecD;
       case Op_RegD:
       case Op_RegL:
 #ifdef _LP64
       case Op_RegP:
 #endif
         return 2;
+      case Op_VecA:
+        assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
+        return SlotsPerVecA;
     }
     // Op_VecS and the rest ideal registers.
     return 1;
 }
 
+int RegMask::num_registers(uint ireg, LRG &lrg) {
+  int n_regs = num_registers(ireg);
+
+  // assigned is OptoReg which is selected by register allocator
+  OptoReg::Name assigned = lrg.reg();
+  assert(OptoReg::is_valid(assigned), "should be valid opto register");
+
+  if (lrg.is_scalable() && OptoReg::is_stack(assigned)) {
+    n_regs = lrg.scalable_reg_slots();
+  }
+  return n_regs;
+}
+
 // Clear out partial bits; leave only bit pairs
 void RegMask::clear_to_pairs() {
   assert(valid_watermarks(), "sanity");
@@ -157,6 +175,16 @@
   }
   return false;
 }
+// Check that whether given reg number with size is valid
+// for current regmask, where reg is the highest number.
+bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const {
+  for (int i = 0; i < size; i++) {
+    if (!Member(reg - i)) {
+      return false;
+    }
+  }
+  return true;
+}
 
 // only indicies of power 2 are accessed, so index 3 is only filled in for storage.
 static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 };
@@ -164,8 +192,13 @@
 // Find the lowest-numbered register set in the mask.  Return the
 // HIGHEST register number in the set, or BAD if no sets.
 // Works also for size 1.
-OptoReg::Name RegMask::find_first_set(const int size) const {
-  assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
+OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const {
+  if (lrg.is_scalable()) {
+    // For scalable vector register, regmask is SlotsPerVecA bits aligned.
+    assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets");
+  } else {
+    assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
+  }
   assert(valid_watermarks(), "sanity");
   for (int i = _lwm; i <= _hwm; i++) {
     if (_A[i]) {                // Found some bits
@@ -245,12 +278,16 @@
     while (bits) {              // Check bits for pairing
       int bit = bits & -bits;   // Extract low bit
       // Low bit is not odd means its mis-aligned.
-      if ((bit & low_bits_mask) == 0) return false;
+      if ((bit & low_bits_mask) == 0) {
+        return false;
+      }
       // Do extra work since (bit << size) may overflow.
       int hi_bit = bit << (size-1); // high bit
       int set = hi_bit + ((hi_bit-1) & ~(bit-1));
       // Check for aligned adjacent bits in this set
-      if ((bits & set) != set) return false;
+      if ((bits & set) != set) {
+        return false;
+      }
       bits -= set;  // Remove this set
     }
   }
--- a/src/hotspot/share/opto/regmask.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/regmask.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,6 +30,8 @@
 #include "utilities/count_leading_zeros.hpp"
 #include "utilities/count_trailing_zeros.hpp"
 
+class LRG;
+
 //-------------Non-zero bit search methods used by RegMask---------------------
 // Find lowest 1, undefined if empty/0
 static int find_lowest_bit(uint32_t mask) {
@@ -91,11 +93,13 @@
   // requirement is internal to the allocator, and independent of any
   // particular platform.
   enum { SlotsPerLong = 2,
+         SlotsPerVecA = 8,
          SlotsPerVecS = 1,
          SlotsPerVecD = 2,
          SlotsPerVecX = 4,
          SlotsPerVecY = 8,
-         SlotsPerVecZ = 16 };
+         SlotsPerVecZ = 16,
+         };
 
   // A constructor only used by the ADLC output.  All mask fields are filled
   // in directly.  Calls to this look something like RM(1,2,3,4);
@@ -219,10 +223,14 @@
   // Test for a single adjacent set of ideal register's size.
   bool is_bound(uint ireg) const;
 
+  // Check that whether given reg number with size is valid
+  // for current regmask, where reg is the highest number.
+  bool is_valid_reg(OptoReg::Name reg, const int size) const;
+
   // Find the lowest-numbered register set in the mask.  Return the
   // HIGHEST register number in the set, or BAD if no sets.
   // Assert that the mask contains only bit sets.
-  OptoReg::Name find_first_set(const int size) const;
+  OptoReg::Name find_first_set(LRG &lrg, const int size) const;
 
   // Clear out partial bits; leave only aligned adjacent bit sets of size.
   void clear_to_sets(const int size);
@@ -236,6 +244,7 @@
 
   static bool is_vector(uint ireg);
   static int num_registers(uint ireg);
+  static int num_registers(uint ireg, LRG &lrg);
 
   // Fast overlap test.  Non-zero if any registers in common.
   int overlap(const RegMask &rm) const {
--- a/src/hotspot/share/opto/superword.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/superword.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -94,8 +94,11 @@
 //------------------------------transform_loop---------------------------
 void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
   assert(UseSuperWord, "should be");
-  // Do vectors exist on this architecture?
-  if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
+  // SuperWord only works with power of two vector sizes.
+  int vector_width = Matcher::vector_width_in_bytes(T_BYTE);
+  if (vector_width < 2 || !is_power_of_2(vector_width)) {
+    return;
+  }
 
   assert(lpt->_head->is_CountedLoop(), "must be");
   CountedLoopNode *cl = lpt->_head->as_CountedLoop();
--- a/src/hotspot/share/opto/type.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/type.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -74,6 +74,7 @@
   { Bad,             T_ILLEGAL,    "vectory:",      false, 0,                    relocInfo::none          },  // VectorY
   { Bad,             T_ILLEGAL,    "vectorz:",      false, 0,                    relocInfo::none          },  // VectorZ
 #else // all other
+  { Bad,             T_ILLEGAL,    "vectora:",      false, Op_VecA,              relocInfo::none          },  // VectorA.
   { Bad,             T_ILLEGAL,    "vectors:",      false, Op_VecS,              relocInfo::none          },  // VectorS
   { Bad,             T_ILLEGAL,    "vectord:",      false, Op_VecD,              relocInfo::none          },  // VectorD
   { Bad,             T_ILLEGAL,    "vectorx:",      false, Op_VecX,              relocInfo::none          },  // VectorX
@@ -654,6 +655,10 @@
   // get_zero_type() should not happen for T_CONFLICT
   _zero_type[T_CONFLICT]= NULL;
 
+  if (Matcher::supports_scalable_vector()) {
+    TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));
+  }
+
   // Vector predefined types, it needs initialized _const_basic_type[].
   if (Matcher::vector_size_supported(T_BYTE,4)) {
     TypeVect::VECTS = TypeVect::make(T_BYTE,4);
@@ -670,6 +675,8 @@
   if (Matcher::vector_size_supported(T_FLOAT,16)) {
     TypeVect::VECTZ = TypeVect::make(T_FLOAT,16);
   }
+
+  mreg2type[Op_VecA] = TypeVect::VECTA;
   mreg2type[Op_VecS] = TypeVect::VECTS;
   mreg2type[Op_VecD] = TypeVect::VECTD;
   mreg2type[Op_VecX] = TypeVect::VECTX;
@@ -989,6 +996,7 @@
 
   Bad,          // Tuple - handled in v-call
   Bad,          // Array - handled in v-call
+  Bad,          // VectorA - handled in v-call
   Bad,          // VectorS - handled in v-call
   Bad,          // VectorD - handled in v-call
   Bad,          // VectorX - handled in v-call
@@ -1897,7 +1905,6 @@
 const TypeTuple *TypeTuple::INT_CC_PAIR;
 const TypeTuple *TypeTuple::LONG_CC_PAIR;
 
-
 //------------------------------make-------------------------------------------
 // Make a TypeTuple from the range of a method signature
 const TypeTuple *TypeTuple::make_range(ciSignature* sig) {
@@ -2268,6 +2275,7 @@
 
 //==============================TypeVect=======================================
 // Convenience common pre-built types.
+const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic
 const TypeVect *TypeVect::VECTS = NULL; //  32-bit vectors
 const TypeVect *TypeVect::VECTD = NULL; //  64-bit vectors
 const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
@@ -2278,10 +2286,11 @@
 const TypeVect* TypeVect::make(const Type *elem, uint length) {
   BasicType elem_bt = elem->array_element_basic_type();
   assert(is_java_primitive(elem_bt), "only primitive types in vector");
-  assert(length > 1 && is_power_of_2(length), "vector length is power of 2");
   assert(Matcher::vector_size_supported(elem_bt, length), "length in range");
   int size = length * type2aelembytes(elem_bt);
   switch (Matcher::vector_ideal_reg(size)) {
+  case Op_VecA:
+    return (TypeVect*)(new TypeVectA(elem, length))->hashcons();
   case Op_VecS:
     return (TypeVect*)(new TypeVectS(elem, length))->hashcons();
   case Op_RegL:
@@ -2313,7 +2322,7 @@
 
   default:                      // All else is a mistake
     typerr(t);
-
+  case VectorA:
   case VectorS:
   case VectorD:
   case VectorX:
@@ -2368,6 +2377,8 @@
 #ifndef PRODUCT
 void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
   switch (base()) {
+  case VectorA:
+    st->print("vectora["); break;
   case VectorS:
     st->print("vectors["); break;
   case VectorD:
--- a/src/hotspot/share/opto/type.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/type.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -53,6 +53,7 @@
 class   TypeAry;
 class   TypeTuple;
 class   TypeVect;
+class     TypeVectA;
 class     TypeVectS;
 class     TypeVectD;
 class     TypeVectX;
@@ -87,6 +88,7 @@
 
     Tuple,                      // Method signature or object layout
     Array,                      // Array types
+    VectorA,                    // (Scalable) Vector types for vector length agnostic
     VectorS,                    //  32bit Vector types
     VectorD,                    //  64bit Vector types
     VectorX,                    // 128bit Vector types
@@ -765,6 +767,7 @@
   virtual const Type *xmeet( const Type *t) const;
   virtual const Type *xdual() const;     // Compute dual right now.
 
+  static const TypeVect *VECTA;
   static const TypeVect *VECTS;
   static const TypeVect *VECTD;
   static const TypeVect *VECTX;
@@ -776,6 +779,11 @@
 #endif
 };
 
+class TypeVectA : public TypeVect {
+  friend class TypeVect;
+  TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {}
+};
+
 class TypeVectS : public TypeVect {
   friend class TypeVect;
   TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {}
@@ -1630,12 +1638,12 @@
 }
 
 inline const TypeVect *Type::is_vect() const {
-  assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" );
+  assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" );
   return (TypeVect*)this;
 }
 
 inline const TypeVect *Type::isa_vect() const {
-  return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL;
+  return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL;
 }
 
 inline const TypePtr *Type::is_ptr() const {
--- a/src/hotspot/share/opto/vectornode.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/opto/vectornode.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1085,7 +1085,7 @@
       (vlen > 1) && is_power_of_2(vlen) &&
       Matcher::vector_size_supported(bt, vlen)) {
     int vopc = ReductionNode::opcode(opc, bt);
-    return vopc != opc && Matcher::match_rule_supported(vopc);
+    return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt);
   }
   return false;
 }
--- a/src/hotspot/share/prims/jvmtiEnv.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/prims/jvmtiEnv.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1714,15 +1714,18 @@
     // shall be posted for this PopFrame.
 
     // It is only safe to perform the direct operation on the current
-    // thread. All other usage needs to use a vm-safepoint-op for safety.
-    if (java_thread == JavaThread::current()) {
-      state->update_for_pop_top_frame();
-    } else {
-      VM_UpdateForPopTopFrame op(state);
-      VMThread::execute(&op);
-      jvmtiError err = op.result();
-      if (err != JVMTI_ERROR_NONE) {
-        return err;
+    // thread. All other usage needs to use a handshake for safety.
+    {
+      MutexLocker mu(JvmtiThreadState_lock);
+      if (java_thread == JavaThread::current()) {
+        state->update_for_pop_top_frame();
+      } else {
+        UpdateForPopTopFrameClosure op(state);
+        bool executed = Handshake::execute_direct(&op, java_thread);
+        jvmtiError err = executed ? op.result() : JVMTI_ERROR_THREAD_NOT_ALIVE;
+        if (err != JVMTI_ERROR_NONE) {
+          return err;
+        }
       }
     }
 
@@ -1796,13 +1799,14 @@
 
   // It is only safe to perform the direct operation on the current
   // thread. All other usage needs to use a vm-safepoint-op for safety.
+  MutexLocker mu(JvmtiThreadState_lock);
   if (java_thread == JavaThread::current()) {
     int frame_number = state->count_frames() - depth;
     state->env_thread_state(this)->set_frame_pop(frame_number);
   } else {
-    VM_SetFramePop op(this, state, depth);
-    VMThread::execute(&op);
-    err = op.result();
+    SetFramePopClosure op(this, state, depth);
+    bool executed = Handshake::execute_direct(&op, java_thread);
+    err = executed ? op.result() : JVMTI_ERROR_THREAD_NOT_ALIVE;
   }
   return err;
 } /* end NotifyFramePop */
--- a/src/hotspot/share/prims/jvmtiEnvBase.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/prims/jvmtiEnvBase.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1504,25 +1504,23 @@
 }
 
 void
-VM_UpdateForPopTopFrame::doit() {
+UpdateForPopTopFrameClosure::do_thread(Thread *target) {
   JavaThread* jt = _state->get_thread();
-  ThreadsListHandle tlh;
-  if (jt != NULL && tlh.includes(jt) && !jt->is_exiting() && jt->threadObj() != NULL) {
+  assert(jt == target, "just checking");
+  if (!jt->is_exiting() && jt->threadObj() != NULL) {
     _state->update_for_pop_top_frame();
-  } else {
-    _result = JVMTI_ERROR_THREAD_NOT_ALIVE;
+    _result = JVMTI_ERROR_NONE;
   }
 }
 
 void
-VM_SetFramePop::doit() {
+SetFramePopClosure::do_thread(Thread *target) {
   JavaThread* jt = _state->get_thread();
-  ThreadsListHandle tlh;
-  if (jt != NULL && tlh.includes(jt) && !jt->is_exiting() && jt->threadObj() != NULL) {
+  assert(jt == target, "just checking");
+  if (!jt->is_exiting() && jt->threadObj() != NULL) {
     int frame_number = _state->count_frames() - _depth;
     _state->env_thread_state((JvmtiEnvBase*)_env)->set_frame_pop(frame_number);
-  } else {
-    _result = JVMTI_ERROR_THREAD_NOT_ALIVE;
+    _result = JVMTI_ERROR_NONE;
   }
 }
 
--- a/src/hotspot/share/prims/jvmtiEnvBase.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/prims/jvmtiEnvBase.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -336,24 +336,23 @@
   JvmtiEnv* next(JvmtiEnvBase* env) { return env->next_environment(); }
 };
 
-// VM operation to update for pop top frame.
-class VM_UpdateForPopTopFrame : public VM_Operation {
+// HandshakeClosure to update for pop top frame.
+class UpdateForPopTopFrameClosure : public HandshakeClosure {
 private:
   JvmtiThreadState* _state;
   jvmtiError _result;
 
 public:
-  VM_UpdateForPopTopFrame(JvmtiThreadState* state) {
-    _state = state;
-    _result = JVMTI_ERROR_NONE;
-  }
-  VMOp_Type type() const { return VMOp_UpdateForPopTopFrame; }
+  UpdateForPopTopFrameClosure(JvmtiThreadState* state)
+    : HandshakeClosure("UpdateForPopTopFrame"),
+      _state(state),
+      _result(JVMTI_ERROR_THREAD_NOT_ALIVE) {}
   jvmtiError result() { return _result; }
-  void doit();
+  void do_thread(Thread *target);
 };
 
-// VM operation to set frame pop.
-class VM_SetFramePop : public VM_Operation {
+// HandshakeClosure to set frame pop.
+class SetFramePopClosure : public HandshakeClosure {
 private:
   JvmtiEnv *_env;
   JvmtiThreadState* _state;
@@ -361,15 +360,14 @@
   jvmtiError _result;
 
 public:
-  VM_SetFramePop(JvmtiEnv *env, JvmtiThreadState* state, jint depth) {
-    _env = env;
-    _state = state;
-    _depth = depth;
-    _result = JVMTI_ERROR_NONE;
-  }
-  VMOp_Type type() const { return VMOp_SetFramePop; }
+  SetFramePopClosure(JvmtiEnv *env, JvmtiThreadState* state, jint depth)
+    : HandshakeClosure("SetFramePop"),
+      _env(env),
+      _state(state),
+      _depth(depth),
+      _result(JVMTI_ERROR_THREAD_NOT_ALIVE) {}
   jvmtiError result() { return _result; }
-  void doit();
+  void do_thread(Thread *target);
 };
 
 
--- a/src/hotspot/share/prims/jvmtiEnvThreadState.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/prims/jvmtiEnvThreadState.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -191,8 +191,11 @@
 
 
 JvmtiFramePops* JvmtiEnvThreadState::get_frame_pops() {
-  assert(get_thread() == Thread::current() || SafepointSynchronize::is_at_safepoint(),
-         "frame pop data only accessible from same thread or at safepoint");
+#ifdef ASSERT
+  Thread *current = Thread::current();
+#endif
+  assert(get_thread() == current || current == get_thread()->active_handshaker(),
+         "frame pop data only accessible from same thread or direct handshake");
   if (_frame_pops == NULL) {
     _frame_pops = new JvmtiFramePops();
     assert(_frame_pops != NULL, "_frame_pops != NULL");
@@ -206,32 +209,44 @@
 }
 
 void JvmtiEnvThreadState::set_frame_pop(int frame_number) {
-  assert(get_thread() == Thread::current() || SafepointSynchronize::is_at_safepoint(),
-         "frame pop data only accessible from same thread or at safepoint");
+#ifdef ASSERT
+  Thread *current = Thread::current();
+#endif
+  assert(get_thread() == current || current == get_thread()->active_handshaker(),
+         "frame pop data only accessible from same thread or direct handshake");
   JvmtiFramePop fpop(frame_number);
   JvmtiEventController::set_frame_pop(this, fpop);
 }
 
 
 void JvmtiEnvThreadState::clear_frame_pop(int frame_number) {
-  assert(get_thread() == Thread::current() || SafepointSynchronize::is_at_safepoint(),
-         "frame pop data only accessible from same thread or at safepoint");
+#ifdef ASSERT
+  Thread *current = Thread::current();
+#endif
+  assert(get_thread() == current || current == get_thread()->active_handshaker(),
+         "frame pop data only accessible from same thread or direct handshake");
   JvmtiFramePop fpop(frame_number);
   JvmtiEventController::clear_frame_pop(this, fpop);
 }
 
 
 void JvmtiEnvThreadState::clear_to_frame_pop(int frame_number)  {
-  assert(get_thread() == Thread::current() || SafepointSynchronize::is_at_safepoint(),
-         "frame pop data only accessible from same thread or at safepoint");
+#ifdef ASSERT
+  Thread *current = Thread::current();
+#endif
+  assert(get_thread() == current || current == get_thread()->active_handshaker(),
+         "frame pop data only accessible from same thread or direct handshake");
   JvmtiFramePop fpop(frame_number);
   JvmtiEventController::clear_to_frame_pop(this, fpop);
 }
 
 
 bool JvmtiEnvThreadState::is_frame_pop(int cur_frame_number) {
-  assert(get_thread() == Thread::current() || SafepointSynchronize::is_at_safepoint(),
-         "frame pop data only accessible from same thread or at safepoint");
+#ifdef ASSERT
+  Thread *current = Thread::current();
+#endif
+  assert(get_thread() == current || current == get_thread()->active_handshaker(),
+         "frame pop data only accessible from same thread or direct handshake");
   if (!get_thread()->is_interp_only_mode() || _frame_pops == NULL) {
     return false;
   }
@@ -240,25 +255,25 @@
 }
 
 
-class VM_GetCurrentLocation : public VM_Operation {
+class GetCurrentLocationClosure : public HandshakeClosure {
  private:
-   JavaThread *_thread;
    jmethodID _method_id;
    int _bci;
 
  public:
-  VM_GetCurrentLocation(JavaThread *thread) {
-     _thread = thread;
-   }
-  VMOp_Type type() const { return VMOp_GetCurrentLocation; }
-  void doit() {
-    ResourceMark rmark; // _thread != Thread::current()
-    RegisterMap rm(_thread, false);
+  GetCurrentLocationClosure()
+    : HandshakeClosure("GetCurrentLocation"),
+      _method_id(NULL),
+      _bci(0) {}
+  void do_thread(Thread *target) {
+    JavaThread *jt = (JavaThread *)target;
+    ResourceMark rmark; // jt != Thread::current()
+    RegisterMap rm(jt, false);
     // There can be a race condition between a VM_Operation reaching a safepoint
     // and the target thread exiting from Java execution.
     // We must recheck the last Java frame still exists.
-    if (!_thread->is_exiting() && _thread->has_last_Java_frame()) {
-      javaVFrame* vf = _thread->last_java_vframe(&rm);
+    if (!jt->is_exiting() && jt->has_last_Java_frame()) {
+      javaVFrame* vf = jt->last_java_vframe(&rm);
       assert(vf != NULL, "must have last java frame");
       Method* method = vf->method();
       _method_id = method->jmethod_id();
@@ -307,9 +322,15 @@
       jmethodID method_id;
       int bci;
       // The java thread stack may not be walkable for a running thread
-      // so get current location at safepoint.
-      VM_GetCurrentLocation op(_thread);
-      VMThread::execute(&op);
+      // so get current location with direct handshake.
+      GetCurrentLocationClosure op;
+      Thread *current = Thread::current();
+      if (current == _thread || _thread->active_handshaker() == current) {
+        op.do_thread(_thread);
+      } else {
+        bool executed = Handshake::execute_direct(&op, _thread);
+        guarantee(executed, "Direct handshake failed. Target thread is not alive?");
+      }
       op.get_current_location(&method_id, &bci);
       set_current_location(method_id, bci);
     }
--- a/src/hotspot/share/prims/jvmtiEventController.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/prims/jvmtiEventController.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -331,10 +331,14 @@
   EC_TRACE(("[%s] # Entering interpreter only mode",
             JvmtiTrace::safe_get_thread_name(state->get_thread())));
   EnterInterpOnlyModeClosure hs;
-  if (SafepointSynchronize::is_at_safepoint()) {
-    hs.do_thread(state->get_thread());
+  assert(state->get_thread()->is_Java_thread(), "just checking");
+  JavaThread *target = (JavaThread *)state->get_thread();
+  Thread *current = Thread::current();
+  if (target == current || target->active_handshaker() == current) {
+    hs.do_thread(target);
   } else {
-    Handshake::execute_direct(&hs, state->get_thread());
+    bool executed = Handshake::execute_direct(&hs, target);
+    guarantee(executed, "Direct handshake failed. Target thread is not alive?");
   }
 }
 
@@ -980,21 +984,21 @@
 
 void
 JvmtiEventController::set_frame_pop(JvmtiEnvThreadState *ets, JvmtiFramePop fpop) {
-  MutexLocker mu(SafepointSynchronize::is_at_safepoint() ? NULL : JvmtiThreadState_lock);
+  assert_lock_strong(JvmtiThreadState_lock);
   JvmtiEventControllerPrivate::set_frame_pop(ets, fpop);
 }
 
 
 void
 JvmtiEventController::clear_frame_pop(JvmtiEnvThreadState *ets, JvmtiFramePop fpop) {
-  MutexLocker mu(SafepointSynchronize::is_at_safepoint() ? NULL : JvmtiThreadState_lock);
+  assert_lock_strong(JvmtiThreadState_lock);
   JvmtiEventControllerPrivate::clear_frame_pop(ets, fpop);
 }
 
 
 void
 JvmtiEventController::clear_to_frame_pop(JvmtiEnvThreadState *ets, JvmtiFramePop fpop) {
-  MutexLocker mu(SafepointSynchronize::is_at_safepoint() ? NULL : JvmtiThreadState_lock);
+  assert_lock_strong(JvmtiThreadState_lock);
   JvmtiEventControllerPrivate::clear_to_frame_pop(ets, fpop);
 }
 
--- a/src/hotspot/share/prims/jvmtiExport.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/prims/jvmtiExport.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1645,7 +1645,10 @@
           }
         }
         // remove the frame's entry
-        ets->clear_frame_pop(cur_frame_number);
+        {
+          MutexLocker mu(JvmtiThreadState_lock);
+          ets->clear_frame_pop(cur_frame_number);
+        }
       }
     }
   }
--- a/src/hotspot/share/prims/jvmtiThreadState.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/prims/jvmtiThreadState.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -272,9 +272,9 @@
 }
 
 int JvmtiThreadState::cur_stack_depth() {
-  guarantee(SafepointSynchronize::is_at_safepoint() ||
-    (JavaThread *)Thread::current() == get_thread(),
-    "must be current thread or at safepoint");
+  Thread *current = Thread::current();
+  guarantee(current == get_thread() || current == get_thread()->active_handshaker(),
+            "must be current thread or direct handshake");
 
   if (!is_interp_only_mode() || _cur_stack_depth == UNKNOWN_STACK_DEPTH) {
     _cur_stack_depth = count_frames();
--- a/src/hotspot/share/prims/methodHandles.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/prims/methodHandles.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -132,6 +132,9 @@
   REFERENCE_KIND_MASK  = java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK,
   SEARCH_SUPERCLASSES  = java_lang_invoke_MemberName::MN_SEARCH_SUPERCLASSES,
   SEARCH_INTERFACES    = java_lang_invoke_MemberName::MN_SEARCH_INTERFACES,
+  LM_UNCONDITIONAL     = java_lang_invoke_MemberName::MN_UNCONDITIONAL_MODE,
+  LM_MODULE            = java_lang_invoke_MemberName::MN_MODULE_MODE,
+  LM_TRUSTED           = java_lang_invoke_MemberName::MN_TRUSTED_MODE,
   ALL_KINDS      = IS_METHOD | IS_CONSTRUCTOR | IS_FIELD | IS_TYPE
 };
 
@@ -672,11 +675,10 @@
   return NULL;
 }
 
-
 // An unresolved member name is a mere symbolic reference.
 // Resolving it plants a vmtarget/vmindex in it,
 // which refers directly to JVM internals.
-Handle MethodHandles::resolve_MemberName(Handle mname, Klass* caller,
+Handle MethodHandles::resolve_MemberName(Handle mname, Klass* caller, int lookup_mode,
                                          bool speculative_resolve, TRAPS) {
   Handle empty;
   assert(java_lang_invoke_MemberName::is_instance(mname()), "");
@@ -745,16 +747,21 @@
   TempNewSymbol type = lookup_signature(type_str(), (mh_invoke_id != vmIntrinsics::_none), CHECK_(empty));
   if (type == NULL)  return empty;  // no such signature exists in the VM
 
+  // skip access check if it's trusted lookup
   LinkInfo::AccessCheck access_check = caller != NULL ?
                                               LinkInfo::AccessCheck::required :
                                               LinkInfo::AccessCheck::skip;
+  // skip loader constraints if it's trusted lookup or a public lookup
+  LinkInfo::LoaderConstraintCheck loader_constraint_check = (caller != NULL && (lookup_mode & LM_UNCONDITIONAL) == 0) ?
+                                              LinkInfo::LoaderConstraintCheck::required :
+                                              LinkInfo::LoaderConstraintCheck::skip;
 
   // Time to do the lookup.
   switch (flags & ALL_KINDS) {
   case IS_METHOD:
     {
       CallInfo result;
-      LinkInfo link_info(defc, name, type, caller, access_check);
+      LinkInfo link_info(defc, name, type, caller, access_check, loader_constraint_check);
       {
         assert(!HAS_PENDING_EXCEPTION, "");
         if (ref_kind == JVM_REF_invokeStatic) {
@@ -795,7 +802,7 @@
   case IS_CONSTRUCTOR:
     {
       CallInfo result;
-      LinkInfo link_info(defc, name, type, caller, access_check);
+      LinkInfo link_info(defc, name, type, caller, access_check, loader_constraint_check);
       {
         assert(!HAS_PENDING_EXCEPTION, "");
         if (name == vmSymbols::object_initializer_name()) {
@@ -820,7 +827,7 @@
       fieldDescriptor result; // find_field initializes fd if found
       {
         assert(!HAS_PENDING_EXCEPTION, "");
-        LinkInfo link_info(defc, name, type, caller, LinkInfo::AccessCheck::skip);
+        LinkInfo link_info(defc, name, type, caller, LinkInfo::AccessCheck::skip, loader_constraint_check);
         LinkResolver::resolve_field(result, link_info, Bytecodes::_nop, false, THREAD);
         if (HAS_PENDING_EXCEPTION) {
           if (speculative_resolve) {
@@ -1117,6 +1124,9 @@
     template(java_lang_invoke_MemberName,MN_HIDDEN_CLASS) \
     template(java_lang_invoke_MemberName,MN_STRONG_LOADER_LINK) \
     template(java_lang_invoke_MemberName,MN_ACCESS_VM_ANNOTATIONS) \
+    template(java_lang_invoke_MemberName,MN_MODULE_MODE) \
+    template(java_lang_invoke_MemberName,MN_UNCONDITIONAL_MODE) \
+    template(java_lang_invoke_MemberName,MN_TRUSTED_MODE) \
     /*end*/
 
 #define IGNORE_REQ(req_expr) /* req_expr */
@@ -1190,13 +1200,17 @@
 
 // void resolve(MemberName self, Class<?> caller)
 JVM_ENTRY(jobject, MHN_resolve_Mem(JNIEnv *env, jobject igcls, jobject mname_jh, jclass caller_jh,
-    jboolean speculative_resolve)) {
+    jint lookup_mode, jboolean speculative_resolve)) {
   if (mname_jh == NULL) { THROW_MSG_NULL(vmSymbols::java_lang_InternalError(), "mname is null"); }
   Handle mname(THREAD, JNIHandles::resolve_non_null(mname_jh));
 
   // The trusted Java code that calls this method should already have performed
   // access checks on behalf of the given caller.  But, we can verify this.
-  if (VerifyMethodHandles && caller_jh != NULL &&
+  // This only verifies from the context of the lookup class.  It does not
+  // verify the lookup context for a Lookup object teleported from one module
+  // to another. Such Lookup object can only access the intersection of the set
+  // of accessible classes from both lookup class and previous lookup class.
+  if (VerifyMethodHandles && (lookup_mode & LM_TRUSTED) == LM_TRUSTED && caller_jh != NULL &&
       java_lang_invoke_MemberName::clazz(mname()) != NULL) {
     Klass* reference_klass = java_lang_Class::as_Klass(java_lang_invoke_MemberName::clazz(mname()));
     if (reference_klass != NULL && reference_klass->is_objArray_klass()) {
@@ -1207,18 +1221,25 @@
     if (reference_klass != NULL && reference_klass->is_instance_klass()) {
       // Emulate LinkResolver::check_klass_accessability.
       Klass* caller = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(caller_jh));
-      if (caller != SystemDictionary::Object_klass()
+      // access check on behalf of the caller if this is not a public lookup
+      // i.e. lookup mode is not UNCONDITIONAL
+      if ((lookup_mode & LM_UNCONDITIONAL) == 0
           && Reflection::verify_class_access(caller,
                                              InstanceKlass::cast(reference_klass),
                                              true) != Reflection::ACCESS_OK) {
-        THROW_MSG_NULL(vmSymbols::java_lang_InternalError(), reference_klass->external_name());
+        ResourceMark rm(THREAD);
+        stringStream ss;
+        ss.print("caller %s tried to access %s", caller->class_in_module_of_loader(),
+                 reference_klass->class_in_module_of_loader());
+        THROW_MSG_NULL(vmSymbols::java_lang_InternalError(), ss.as_string());
       }
     }
   }
 
   Klass* caller = caller_jh == NULL ? NULL :
                      java_lang_Class::as_Klass(JNIHandles::resolve_non_null(caller_jh));
-  Handle resolved = MethodHandles::resolve_MemberName(mname, caller, speculative_resolve == JNI_TRUE,
+  Handle resolved = MethodHandles::resolve_MemberName(mname, caller, lookup_mode,
+                                                      speculative_resolve == JNI_TRUE,
                                                       CHECK_NULL);
 
   if (resolved.is_null()) {
@@ -1518,7 +1539,7 @@
 static JNINativeMethod MHN_methods[] = {
   {CC "init",                      CC "(" MEM "" OBJ ")V",                   FN_PTR(MHN_init_Mem)},
   {CC "expand",                    CC "(" MEM ")V",                          FN_PTR(MHN_expand_Mem)},
-  {CC "resolve",                   CC "(" MEM "" CLS "Z)" MEM,               FN_PTR(MHN_resolve_Mem)},
+  {CC "resolve",                   CC "(" MEM "" CLS "IZ)" MEM,              FN_PTR(MHN_resolve_Mem)},
   //  static native int getNamedCon(int which, Object[] name)
   {CC "getNamedCon",               CC "(I[" OBJ ")I",                        FN_PTR(MHN_getNamedCon)},
   //  static native int getMembers(Class<?> defc, String matchName, String matchSig,
--- a/src/hotspot/share/prims/methodHandles.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/prims/methodHandles.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -60,7 +60,7 @@
 
  public:
   // working with member names
-  static Handle resolve_MemberName(Handle mname, Klass* caller,
+  static Handle resolve_MemberName(Handle mname, Klass* caller, int lookup_mode,
                                    bool speculative_resolve, TRAPS); // compute vmtarget/vmindex from name/type
   static void expand_MemberName(Handle mname, int suppress, TRAPS);  // expand defc/name/type if missing
   static oop init_MemberName(Handle mname_h, Handle target_h, TRAPS); // compute vmtarget/vmindex from target
--- a/src/hotspot/share/runtime/globals.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/runtime/globals.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -347,6 +347,12 @@
   diagnostic(bool, UseVectorizedMismatchIntrinsic, false,                   \
           "Enables intrinsification of ArraysSupport.vectorizedMismatch()") \
                                                                             \
+  diagnostic(bool, UseCopySignIntrinsic, false,                             \
+          "Enables intrinsification of Math.copySign")                      \
+                                                                            \
+  diagnostic(bool, UseSignumIntrinsic, false,                               \
+          "Enables intrinsification of Math.signum")                        \
+                                                                            \
   diagnostic(ccstrlist, DisableIntrinsic, "",                               \
          "do not expand intrinsics whose (internal) names appear here")     \
                                                                             \
--- a/src/hotspot/share/runtime/handshake.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/runtime/handshake.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -383,9 +383,9 @@
   _operation_direct(NULL),
   _handshake_turn_sem(1),
   _processing_sem(1),
-  _thread_in_process_handshake(false)
+  _thread_in_process_handshake(false),
+  _active_handshaker(NULL)
 {
-  DEBUG_ONLY(_active_handshaker = NULL;)
 }
 
 void HandshakeState::set_operation(HandshakeOperation* op) {
@@ -510,9 +510,9 @@
   if (can_process_handshake()) {
     guarantee(!_processing_sem.trywait(), "we should already own the semaphore");
     log_trace(handshake)("Processing handshake by %s", Thread::current()->is_VM_thread() ? "VMThread" : "Handshaker");
-    DEBUG_ONLY(_active_handshaker = Thread::current();)
+    _active_handshaker = Thread::current();
     op->do_handshake(_handshakee);
-    DEBUG_ONLY(_active_handshaker = NULL;)
+    _active_handshaker = NULL;
     // Disarm after we have executed the operation.
     clear_handshake(is_direct);
     pr = _success;
--- a/src/hotspot/share/runtime/handshake.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/runtime/handshake.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -106,11 +106,8 @@
   };
   ProcessResult try_process(HandshakeOperation* op);
 
-#ifdef ASSERT
   Thread* _active_handshaker;
   Thread* active_handshaker() const { return _active_handshaker; }
-#endif
-
 };
 
 #endif // SHARE_RUNTIME_HANDSHAKE_HPP
--- a/src/hotspot/share/runtime/thread.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/runtime/thread.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -2613,9 +2613,17 @@
 void JavaThread::java_suspend_self_with_safepoint_check() {
   assert(this == Thread::current(), "invariant");
   JavaThreadState state = thread_state();
-  set_thread_state(_thread_blocked);
-  java_suspend_self();
-  set_thread_state_fence(state);
+
+  do {
+    set_thread_state(_thread_blocked);
+    java_suspend_self();
+    // The current thread could have been suspended again. We have to check for
+    // suspend after restoring the saved state. Without this the current thread
+    // might return to _thread_in_Java and execute bytecodes for an arbitrary
+    // long time.
+    set_thread_state_fence(state);
+  } while (is_external_suspend());
+
   // Since we are not using a regular thread-state transition helper here,
   // we must manually emit the instruction barrier after leaving a safe state.
   OrderAccess::cross_modify_fence();
--- a/src/hotspot/share/runtime/thread.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/runtime/thread.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1365,11 +1365,9 @@
     return _handshake.try_process(op);
   }
 
-#ifdef ASSERT
   Thread* active_handshaker() const {
     return _handshake.active_handshaker();
   }
-#endif
 
   // Suspend/resume support for JavaThread
  private:
--- a/src/hotspot/share/runtime/vmOperations.hpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/runtime/vmOperations.hpp	Tue Sep 08 15:28:06 2020 +0800
@@ -76,14 +76,11 @@
   template(PopulateDumpSharedSpace)               \
   template(JNIFunctionTableCopier)                \
   template(RedefineClasses)                       \
-  template(UpdateForPopTopFrame)                  \
-  template(SetFramePop)                           \
   template(GetObjectMonitorUsage)                 \
   template(GetAllStackTraces)                     \
   template(GetThreadListStackTraces)              \
   template(ChangeBreakpoints)                     \
   template(GetOrSetLocal)                         \
-  template(GetCurrentLocation)                    \
   template(ChangeSingleStep)                      \
   template(HeapWalkOperation)                     \
   template(HeapIterateOperation)                  \
--- a/src/hotspot/share/runtime/vmStructs.cpp	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/hotspot/share/runtime/vmStructs.cpp	Tue Sep 08 15:28:06 2020 +0800
@@ -1849,6 +1849,10 @@
   declare_c2_type(OverflowMulLNode, OverflowLNode)                        \
   declare_c2_type(FmaDNode, Node)                                         \
   declare_c2_type(FmaFNode, Node)                                         \
+  declare_c2_type(CopySignDNode, Node)                                    \
+  declare_c2_type(CopySignFNode, Node)                                    \
+  declare_c2_type(SignumDNode, Node)                                      \
+  declare_c2_type(SignumFNode, Node)                                      \
   declare_c2_type(LoadVectorGatherNode, LoadVectorNode)                   \
   declare_c2_type(StoreVectorScatterNode, StoreVectorNode)                \
   declare_c2_type(VectorLoadMaskNode, VectorNode)                         \
--- a/src/java.base/share/classes/java/lang/Math.java	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/java.base/share/classes/java/lang/Math.java	Tue Sep 08 15:28:06 2020 +0800
@@ -1977,6 +1977,7 @@
      * @author Joseph D. Darcy
      * @since 1.5
      */
+    @HotSpotIntrinsicCandidate
     public static double signum(double d) {
         return (d == 0.0 || Double.isNaN(d))?d:copySign(1.0, d);
     }
@@ -1998,6 +1999,7 @@
      * @author Joseph D. Darcy
      * @since 1.5
      */
+    @HotSpotIntrinsicCandidate
     public static float signum(float f) {
         return (f == 0.0f || Float.isNaN(f))?f:copySign(1.0f, f);
     }
@@ -2218,6 +2220,7 @@
      * and the sign of {@code sign}.
      * @since 1.6
      */
+    @HotSpotIntrinsicCandidate
     public static double copySign(double magnitude, double sign) {
         return Double.longBitsToDouble((Double.doubleToRawLongBits(sign) &
                                         (DoubleConsts.SIGN_BIT_MASK)) |
@@ -2241,6 +2244,7 @@
      * and the sign of {@code sign}.
      * @since 1.6
      */
+    @HotSpotIntrinsicCandidate
     public static float copySign(float magnitude, float sign) {
         return Float.intBitsToFloat((Float.floatToRawIntBits(sign) &
                                      (FloatConsts.SIGN_BIT_MASK)) |
--- a/src/java.base/share/classes/java/lang/invoke/ClassSpecializer.java	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/java.base/share/classes/java/lang/invoke/ClassSpecializer.java	Tue Sep 08 15:28:06 2020 +0800
@@ -46,6 +46,7 @@
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.function.Function;
 
+import static java.lang.invoke.GenerateJLIClassesHelper.traceSpeciesType;
 import static java.lang.invoke.LambdaForm.*;
 import static java.lang.invoke.MethodHandleNatives.Constants.REF_getStatic;
 import static java.lang.invoke.MethodHandleNatives.Constants.REF_putStatic;
@@ -475,15 +476,8 @@
             Class<?> salvage = null;
             try {
                 salvage = BootLoader.loadClassOrNull(className);
-                if (TRACE_RESOLVE && salvage != null) {
-                    // Used by jlink species pregeneration plugin, see
-                    // jdk.tools.jlink.internal.plugins.GenerateJLIClassesPlugin
-                    System.out.println("[SPECIES_RESOLVE] " + className + " (salvaged)");
-                }
+                traceSpeciesType(className, salvage);
             } catch (Error ex) {
-                if (TRACE_RESOLVE) {
-                    System.out.println("[SPECIES_FRESOLVE] " + className + " (Error) " + ex.getMessage());
-                }
             }
             final Class<? extends T> speciesCode;
             if (salvage != null) {
@@ -494,19 +488,12 @@
                 // Not pregenerated, generate the class
                 try {
                     speciesCode = generateConcreteSpeciesCode(className, speciesData);
-                    if (TRACE_RESOLVE) {
-                        // Used by jlink species pregeneration plugin, see
-                        // jdk.tools.jlink.internal.plugins.GenerateJLIClassesPlugin
-                        System.out.println("[SPECIES_RESOLVE] " + className + " (generated)");
-                    }
+                    traceSpeciesType(className, salvage);
                     // This operation causes a lot of churn:
                     linkSpeciesDataToCode(speciesData, speciesCode);
                     // This operation commits the relation, but causes little churn:
                     linkCodeToSpeciesData(speciesCode, speciesData, false);
                 } catch (Error ex) {
-                    if (TRACE_RESOLVE) {
-                        System.out.println("[SPECIES_RESOLVE] " + className + " (Error #2)" );
-                    }
                     // We can get here if there is a race condition loading a class.
                     // Or maybe we are out of resources.  Back out of the CHM.get and retry.
                     throw ex;
--- a/src/java.base/share/classes/java/lang/invoke/DelegatingMethodHandle.java	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/java.base/share/classes/java/lang/invoke/DelegatingMethodHandle.java	Tue Sep 08 15:28:06 2020 +0800
@@ -28,7 +28,7 @@
 import java.util.Arrays;
 import static java.lang.invoke.LambdaForm.*;
 import static java.lang.invoke.LambdaForm.Kind.*;
-import static java.lang.invoke.MethodHandleNatives.Constants.REF_invokeVirtual;
+import static java.lang.invoke.MethodHandleNatives.Constants.*;
 import static java.lang.invoke.MethodHandleStatics.*;
 
 /**
@@ -177,7 +177,7 @@
                     MethodType.methodType(MethodHandle.class), REF_invokeVirtual);
             NF_getTarget = new NamedFunction(
                     MemberName.getFactory()
-                            .resolveOrFail(REF_invokeVirtual, member, DelegatingMethodHandle.class, NoSuchMethodException.class));
+                            .resolveOrFail(REF_invokeVirtual, member, DelegatingMethodHandle.class, LM_TRUSTED, NoSuchMethodException.class));
         } catch (ReflectiveOperationException ex) {
             throw newInternalError(ex);
         }
--- a/src/java.base/share/classes/java/lang/invoke/DirectMethodHandle.java	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/java.base/share/classes/java/lang/invoke/DirectMethodHandle.java	Tue Sep 08 15:28:06 2020 +0800
@@ -63,7 +63,7 @@
             member.isMethod() && !member.isAbstract()) {
             // Check for corner case: invokeinterface of Object method
             MemberName m = new MemberName(Object.class, member.getName(), member.getMethodType(), member.getReferenceKind());
-            m = MemberName.getFactory().resolveOrNull(m.getReferenceKind(), m, null);
+            m = MemberName.getFactory().resolveOrNull(m.getReferenceKind(), m, null, LM_TRUSTED);
             if (m != null && m.isPublic()) {
                 assert(member.getReferenceKind() == m.getReferenceKind());  // else this.form is wrong
                 member = m;
@@ -260,7 +260,8 @@
                     .changeReturnType(void.class);          // <init> returns void
         MemberName linker = new MemberName(MethodHandle.class, linkerName, mtypeWithArg, REF_invokeStatic);
         try {
-            linker = IMPL_NAMES.resolveOrFail(REF_invokeStatic, linker, null, NoSuchMethodException.class);
+            linker = IMPL_NAMES.resolveOrFail(REF_invokeStatic, linker, null, LM_TRUSTED,
+                                              NoSuchMethodException.class);
         } catch (ReflectiveOperationException ex) {
             throw newInternalError(ex);
         }
@@ -771,7 +772,8 @@
             linkerType = MethodType.methodType(void.class, Object.class, long.class, ft);
         MemberName linker = new MemberName(Unsafe.class, kind.methodName, linkerType, REF_invokeVirtual);
         try {
-            linker = IMPL_NAMES.resolveOrFail(REF_invokeVirtual, linker, null, NoSuchMethodException.class);
+            linker = IMPL_NAMES.resolveOrFail(REF_invokeVirtual, linker, null, LM_TRUSTED,
+                                              NoSuchMethodException.class);
         } catch (ReflectiveOperationException ex) {
             throw newInternalError(ex);
         }
@@ -914,13 +916,15 @@
                 case NF_UNSAFE:
                     MemberName member = new MemberName(MethodHandleStatics.class, "UNSAFE", Unsafe.class, REF_getField);
                     return new NamedFunction(
-                            MemberName.getFactory()
-                                    .resolveOrFail(REF_getField, member, DirectMethodHandle.class, NoSuchMethodException.class));
+                            MemberName.getFactory().resolveOrFail(REF_getField, member,
+                                                                  DirectMethodHandle.class, LM_TRUSTED,
+                                                                  NoSuchMethodException.class));
                 case NF_checkReceiver:
                     member = new MemberName(DirectMethodHandle.class, "checkReceiver", OBJ_OBJ_TYPE, REF_invokeVirtual);
                     return new NamedFunction(
-                        MemberName.getFactory()
-                            .resolveOrFail(REF_invokeVirtual, member, DirectMethodHandle.class, NoSuchMethodException.class));
+                            MemberName.getFactory().resolveOrFail(REF_invokeVirtual, member,
+                                                                  DirectMethodHandle.class, LM_TRUSTED,
+                                                                  NoSuchMethodException.class));
                 default:
                     throw newInternalError("Unknown function: " + func);
             }
@@ -934,8 +938,9 @@
     {
         MemberName member = new MemberName(DirectMethodHandle.class, name, type, REF_invokeStatic);
         return new NamedFunction(
-            MemberName.getFactory()
-                .resolveOrFail(REF_invokeStatic, member, DirectMethodHandle.class, NoSuchMethodException.class));
+                MemberName.getFactory().resolveOrFail(REF_invokeStatic, member,
+                                                      DirectMethodHandle.class, LM_TRUSTED,
+                                                      NoSuchMethodException.class));
     }
 
     static {
--- a/src/java.base/share/classes/java/lang/invoke/GenerateJLIClassesHelper.java	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/java.base/share/classes/java/lang/invoke/GenerateJLIClassesHelper.java	Tue Sep 08 15:28:06 2020 +0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,16 +32,328 @@
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.stream.Stream;
 
-import static java.lang.invoke.MethodTypeForm.LF_INVINTERFACE;
-import static java.lang.invoke.MethodTypeForm.LF_INVVIRTUAL;
+import static java.lang.invoke.LambdaForm.basicTypeSignature;
+import static java.lang.invoke.LambdaForm.shortenSignature;
+import static java.lang.invoke.LambdaForm.BasicType.*;
+import static java.lang.invoke.MethodHandleStatics.TRACE_RESOLVE;
+import static java.lang.invoke.MethodTypeForm.*;
+import static java.lang.invoke.LambdaForm.Kind.*;
 
 /**
  * Helper class to assist the GenerateJLIClassesPlugin to get access to
  * generate classes ahead of time.
  */
 class GenerateJLIClassesHelper {
+    private static final String LF_RESOLVE = "[LF_RESOLVE]";
+    private static final String SPECIES_RESOLVE = "[SPECIES_RESOLVE]";
 
+    static void traceLambdaForm(String name, MethodType type, Class<?> holder, MemberName resolvedMember) {
+        if (TRACE_RESOLVE) {
+            System.out.println(LF_RESOLVE + " " + holder.getName() + " " + name + " " +
+                    shortenSignature(basicTypeSignature(type)) +
+                    (resolvedMember != null ? " (success)" : " (fail)"));
+        }
+    }
+
+    static void traceSpeciesType(String cn, Class<?> salvage) {
+        if (TRACE_RESOLVE) {
+            System.out.println(SPECIES_RESOLVE + " " + cn + (salvage != null ? " (salvaged)" : " (generated)"));
+        }
+    }
+
+    // Map from DirectMethodHandle method type name to index to LambdForms
+    static final Map<String, Integer> DMH_METHOD_TYPE_MAP =
+            Map.of(
+                    DIRECT_INVOKE_VIRTUAL.methodName,     LF_INVVIRTUAL,
+                    DIRECT_INVOKE_STATIC.methodName,      LF_INVSTATIC,
+                    DIRECT_INVOKE_SPECIAL.methodName,     LF_INVSPECIAL,
+                    DIRECT_NEW_INVOKE_SPECIAL.methodName, LF_NEWINVSPECIAL,
+                    DIRECT_INVOKE_INTERFACE.methodName,   LF_INVINTERFACE,
+                    DIRECT_INVOKE_STATIC_INIT.methodName, LF_INVSTATIC_INIT,
+                    DIRECT_INVOKE_SPECIAL_IFC.methodName, LF_INVSPECIAL_IFC
+            );
+
+    static final String DIRECT_HOLDER = "java/lang/invoke/DirectMethodHandle$Holder";
+    static final String DELEGATING_HOLDER = "java/lang/invoke/DelegatingMethodHandle$Holder";
+    static final String BASIC_FORMS_HOLDER = "java/lang/invoke/LambdaForm$Holder";
+    static final String INVOKERS_HOLDER = "java/lang/invoke/Invokers$Holder";
+    static final String INVOKERS_HOLDER_CLASS_NAME = INVOKERS_HOLDER.replace('/', '.');
+    static final String BMH_SPECIES_PREFIX = "java.lang.invoke.BoundMethodHandle$Species_";
+
+    static class HolderClassBuilder {
+
+
+        private final TreeSet<String> speciesTypes = new TreeSet<>();
+        private final TreeSet<String> invokerTypes = new TreeSet<>();
+        private final TreeSet<String> callSiteTypes = new TreeSet<>();
+        private final Map<String, Set<String>> dmhMethods = new TreeMap<>();
+
+        HolderClassBuilder addSpeciesType(String type) {
+            speciesTypes.add(expandSignature(type));
+            return this;
+        }
+
+        HolderClassBuilder addInvokerType(String methodType) {
+            validateMethodType(methodType);
+            invokerTypes.add(methodType);
+            return this;
+        }
+
+        HolderClassBuilder addCallSiteType(String csType) {
+            validateMethodType(csType);
+            callSiteTypes.add(csType);
+            return this;
+        }
+
+        Map<String, byte[]> build() {
+            int count = 0;
+            for (Set<String> entry : dmhMethods.values()) {
+                count += entry.size();
+            }
+            MethodType[] directMethodTypes = new MethodType[count];
+            int[] dmhTypes = new int[count];
+            int index = 0;
+            for (Map.Entry<String, Set<String>> entry : dmhMethods.entrySet()) {
+                String dmhType = entry.getKey();
+                for (String type : entry.getValue()) {
+                    // The DMH type to actually ask for is retrieved by removing
+                    // the first argument, which needs to be of Object.class
+                    MethodType mt = asMethodType(type);
+                    if (mt.parameterCount() < 1 ||
+                            mt.parameterType(0) != Object.class) {
+                        throw new RuntimeException(
+                                "DMH type parameter must start with L: " + dmhType + " " + type);
+                    }
+
+                    // Adapt the method type of the LF to retrieve
+                    directMethodTypes[index] = mt.dropParameterTypes(0, 1);
+
+                    // invokeVirtual and invokeInterface must have a leading Object
+                    // parameter, i.e., the receiver
+                    dmhTypes[index] = DMH_METHOD_TYPE_MAP.get(dmhType);
+                    if (dmhTypes[index] == LF_INVINTERFACE || dmhTypes[index] == LF_INVVIRTUAL) {
+                        if (mt.parameterCount() < 2 ||
+                                mt.parameterType(1) != Object.class) {
+                            throw new RuntimeException(
+                                    "DMH type parameter must start with LL: " + dmhType + " " + type);
+                        }
+                    }
+                    index++;
+                }
+            }
+
+            // The invoker type to ask for is retrieved by removing the first
+            // and the last argument, which needs to be of Object.class
+            MethodType[] invokerMethodTypes = new MethodType[invokerTypes.size()];
+            index = 0;
+            for (String invokerType : invokerTypes) {
+                MethodType mt = asMethodType(invokerType);
+                final int lastParam = mt.parameterCount() - 1;
+                if (mt.parameterCount() < 2 ||
+                        mt.parameterType(0) != Object.class ||
+                        mt.parameterType(lastParam) != Object.class) {
+                    throw new RuntimeException(
+                            "Invoker type parameter must start and end with Object: " + invokerType);
+                }
+                mt = mt.dropParameterTypes(lastParam, lastParam + 1);
+                invokerMethodTypes[index] = mt.dropParameterTypes(0, 1);
+                index++;
+            }
+
+            // The callSite type to ask for is retrieved by removing the last
+            // argument, which needs to be of Object.class
+            MethodType[] callSiteMethodTypes = new MethodType[callSiteTypes.size()];
+            index = 0;
+            for (String callSiteType : callSiteTypes) {
+                MethodType mt = asMethodType(callSiteType);
+                final int lastParam = mt.parameterCount() - 1;
+                if (mt.parameterCount() < 1 ||
+                        mt.parameterType(lastParam) != Object.class) {
+                    throw new RuntimeException(
+                            "CallSite type parameter must end with Object: " + callSiteType);
+                }
+                callSiteMethodTypes[index] = mt.dropParameterTypes(lastParam, lastParam + 1);
+                index++;
+            }
+
+            Map<String, byte[]> result = new TreeMap<>();
+            result.put(DIRECT_HOLDER,
+                       generateDirectMethodHandleHolderClassBytes(
+                            DIRECT_HOLDER, directMethodTypes, dmhTypes));
+            result.put(DELEGATING_HOLDER,
+                       generateDelegatingMethodHandleHolderClassBytes(
+                            DELEGATING_HOLDER, directMethodTypes));
+            result.put(INVOKERS_HOLDER,
+                       generateInvokersHolderClassBytes(INVOKERS_HOLDER,
+                            invokerMethodTypes, callSiteMethodTypes));
+            result.put(BASIC_FORMS_HOLDER,
+                       generateBasicFormsClassBytes(BASIC_FORMS_HOLDER));
+
+            speciesTypes.forEach(types -> {
+                Map.Entry<String, byte[]> entry = generateConcreteBMHClassBytes(types);
+                result.put(entry.getKey(), entry.getValue());
+            });
+
+            // clear builder
+            speciesTypes.clear();
+            invokerTypes.clear();
+            callSiteTypes.clear();
+            dmhMethods.clear();
+
+            return result;
+        }
+
+        private static MethodType asMethodType(String basicSignatureString) {
+            String[] parts = basicSignatureString.split("_");
+            assert (parts.length == 2);
+            assert (parts[1].length() == 1);
+            String parameters = expandSignature(parts[0]);
+            Class<?> rtype = simpleType(parts[1].charAt(0));
+            if (parameters.isEmpty()) {
+                return MethodType.methodType(rtype);
+            } else {
+                Class<?>[] ptypes = new Class<?>[parameters.length()];
+                for (int i = 0; i < ptypes.length; i++) {
+                    ptypes[i] = simpleType(parameters.charAt(i));
+                }
+                return MethodType.methodType(rtype, ptypes);
+            }
+        }
+
+        private void addDMHMethodType(String dmh, String methodType) {
+            validateMethodType(methodType);
+            Set<String> methodTypes = dmhMethods.get(dmh);
+            if (methodTypes == null) {
+                methodTypes = new TreeSet<>();
+                dmhMethods.put(dmh, methodTypes);
+            }
+            methodTypes.add(methodType);
+        }
+
+        private static void validateMethodType(String type) {
+            String[] typeParts = type.split("_");
+            // check return type (second part)
+            if (typeParts.length != 2 || typeParts[1].length() != 1
+                    || !isBasicTypeChar(typeParts[1].charAt(0))) {
+                throw new RuntimeException(
+                        "Method type signature must be of form [LJIFD]*_[LJIFDV]");
+            }
+            // expand and check arguments (first part)
+            expandSignature(typeParts[0]);
+        }
+
+        // Convert LL -> LL, L3 -> LLL
+        private static String expandSignature(String signature) {
+            StringBuilder sb = new StringBuilder();
+            char last = 'X';
+            int count = 0;
+            for (int i = 0; i < signature.length(); i++) {
+                char c = signature.charAt(i);
+                if (c >= '0' && c <= '9') {
+                    count *= 10;
+                    count += (c - '0');
+                } else {
+                    requireBasicType(c);
+                    for (int j = 1; j < count; j++) {
+                        sb.append(last);
+                    }
+                    sb.append(c);
+                    last = c;
+                    count = 0;
+                }
+            }
+
+            // ended with a number, e.g., "L2": append last char count - 1 times
+            if (count > 1) {
+                requireBasicType(last);
+                for (int j = 1; j < count; j++) {
+                    sb.append(last);
+                }
+            }
+            return sb.toString();
+        }
+
+        private static void requireBasicType(char c) {
+            if (!isArgBasicTypeChar(c)) {
+                throw new RuntimeException(
+                        "Character " + c + " must correspond to a basic field type: LIJFD");
+            }
+        }
+
+        private static Class<?> simpleType(char c) {
+            if (isBasicTypeChar(c)) {
+                return LambdaForm.BasicType.basicType(c).basicTypeClass();
+            }
+            switch (c) {
+                case 'Z':
+                case 'B':
+                case 'S':
+                case 'C':
+                    throw new IllegalArgumentException("Not a valid primitive: " + c +
+                            " (use I instead)");
+                default:
+                    throw new IllegalArgumentException("Not a primitive: " + c);
+            }
+        }
+    }
+
+    /*
+     * Returns a map of class name in internal form to the corresponding class bytes
+     * per the given stream of SPECIES_RESOLVE and LF_RESOLVE trace logs.
+     *
+     * Used by GenerateJLIClassesPlugin to pre-generate holder classes during
+     * jlink phase.
+     */
+    static Map<String, byte[]> generateHolderClasses(Stream<String> traces)  {
+        HolderClassBuilder builder = new HolderClassBuilder();
+        traces.map(line -> line.split(" "))
+                .forEach(parts -> {
+                    switch (parts[0]) {
+                        case SPECIES_RESOLVE:
+                            // Allow for new types of species data classes being resolved here
+                            assert parts.length == 3;
+                            if (parts[1].startsWith(BMH_SPECIES_PREFIX)) {
+                                String species = parts[1].substring(BMH_SPECIES_PREFIX.length());
+                                if (!"L".equals(species)) {
+                                    builder.addSpeciesType(species);
+                                }
+                            }
+                            break;
+                        case LF_RESOLVE:
+                            assert parts.length > 3;
+                            String methodType = parts[3];
+                            if (parts[1].equals(INVOKERS_HOLDER_CLASS_NAME)) {
+                                if ("linkToTargetMethod".equals(parts[2]) ||
+                                        "linkToCallSite".equals(parts[2])) {
+                                    builder.addCallSiteType(methodType);
+                                } else {
+                                    builder.addInvokerType(methodType);
+                                }
+                            } else if (parts[1].contains("DirectMethodHandle")) {
+                                String dmh = parts[2];
+                                // ignore getObject etc for now (generated by default)
+                                if (DMH_METHOD_TYPE_MAP.containsKey(dmh)) {
+                                    builder.addDMHMethodType(dmh, methodType);
+                                }
+                            }
+                            break;
+                        default:
+                            break; // ignore
+                    }
+                });
+
+        return builder.build();
+    }
+
+    /**
+     * Returns a {@code byte[]} representation of a class implementing
+     * the zero and identity forms of all {@code LambdaForm.BasicType}s.
+     */
     static byte[] generateBasicFormsClassBytes(String className) {
         ArrayList<LambdaForm> forms = new ArrayList<>();
         ArrayList<String> names = new ArrayList<>();
@@ -68,6 +380,11 @@
                 forms.toArray(new LambdaForm[0]));
     }
 
+    /**
+     * Returns a {@code byte[]} representation of a class implementing
+     * DirectMethodHandle of each pairwise combination of {@code MethodType} and
+     * an {@code int} representing method type.
+     */
     static byte[] generateDirectMethodHandleHolderClassBytes(String className,
             MethodType[] methodTypes, int[] types) {
         ArrayList<LambdaForm> forms = new ArrayList<>();
@@ -115,6 +432,11 @@
                 forms.toArray(new LambdaForm[0]));
     }
 
+    /**
+     * Returns a {@code byte[]} representation of a class implementing
+     * DelegatingMethodHandles of each {@code MethodType} kind in the
+     * {@code methodTypes} argument.
+     */
     static byte[] generateDelegatingMethodHandleHolderClassBytes(String className,
             MethodType[] methodTypes) {
 
@@ -145,6 +467,11 @@
                 forms.toArray(new LambdaForm[0]));
     }
 
+    /**
+     * Returns a {@code byte[]} representation of a class implementing
+     * the invoker forms for the set of supplied {@code invokerMethodTypes}
+     * and {@code callSiteMethodTypes}.
+     */
     static byte[] generateInvokersHolderClassBytes(String className,
             MethodType[] invokerMethodTypes, MethodType[] callSiteMethodTypes) {
 
@@ -193,10 +520,7 @@
      * Generate customized code for a set of LambdaForms of specified types into
      * a class with a specified name.
      */
-    private static byte[] generateCodeBytesForLFs(String className,
-            String[] names, LambdaForm[] forms) {
-
-
+    private static byte[] generateCodeBytesForLFs(String className, String[] names, LambdaForm[] forms) {
         ClassWriter cw = new ClassWriter(ClassWriter.COMPUTE_MAXS + ClassWriter.COMPUTE_FRAMES);
         cw.visit(Opcodes.V1_8, Opcodes.ACC_PRIVATE + Opcodes.ACC_FINAL + Opcodes.ACC_SUPER,
                 className, null, InvokerBytecodeGenerator.INVOKER_SUPER_NAME, null);
@@ -229,10 +553,14 @@
                 DelegatingMethodHandle.NF_getTarget);
     }
 
+    /**
+     * Returns a {@code byte[]} representation of {@code BoundMethodHandle}
+     * species class implementing the signature defined by {@code types}.
+     */
     @SuppressWarnings({"rawtypes", "unchecked"})
     static Map.Entry<String, byte[]> generateConcreteBMHClassBytes(final String types) {
         for (char c : types.toCharArray()) {
-            if ("LIJFD".indexOf(c) < 0) {
+            if (!isArgBasicTypeChar(c)) {
                 throw new IllegalArgumentException("All characters must "
                         + "correspond to a basic field type: LIJFD");
             }
--- a/src/java.base/share/classes/java/lang/invoke/InvokerBytecodeGenerator.java	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/java.base/share/classes/java/lang/invoke/InvokerBytecodeGenerator.java	Tue Sep 08 15:28:06 2020 +0800
@@ -46,6 +46,7 @@
 import java.util.List;
 import java.util.stream.Stream;
 
+import static java.lang.invoke.GenerateJLIClassesHelper.traceLambdaForm;
 import static java.lang.invoke.LambdaForm.BasicType;
 import static java.lang.invoke.LambdaForm.BasicType.*;
 import static java.lang.invoke.LambdaForm.*;
@@ -322,7 +323,9 @@
     private static MemberName resolveInvokerMember(Class<?> invokerClass, String name, MethodType type) {
         MemberName member = new MemberName(invokerClass, name, type, REF_invokeStatic);
         try {
-            member = MEMBERNAME_FACTORY.resolveOrFail(REF_invokeStatic, member, HOST_CLASS, ReflectiveOperationException.class);
+            member = MEMBERNAME_FACTORY.resolveOrFail(REF_invokeStatic, member,
+                                                      HOST_CLASS, LM_TRUSTED,
+                                                      ReflectiveOperationException.class);
         } catch (ReflectiveOperationException e) {
             throw newInternalError(e);
         }
@@ -693,11 +696,8 @@
 
     private static MemberName resolveFrom(String name, MethodType type, Class<?> holder) {
         MemberName member = new MemberName(holder, name, type, REF_invokeStatic);
-        MemberName resolvedMember = MemberName.getFactory().resolveOrNull(REF_invokeStatic, member, holder);
-        if (TRACE_RESOLVE) {
-            System.out.println("[LF_RESOLVE] " + holder.getName() + " " + name + " " +
-                    shortenSignature(basicTypeSignature(type)) + (resolvedMember != null ? " (success)" : " (fail)") );
-        }
+        MemberName resolvedMember = MemberName.getFactory().resolveOrNull(REF_invokeStatic, member, holder, LM_TRUSTED);
+        traceLambdaForm(name, type, holder, resolvedMember);
         return resolvedMember;
     }
 
--- a/src/java.base/share/classes/java/lang/invoke/Invokers.java	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/java.base/share/classes/java/lang/invoke/Invokers.java	Tue Sep 08 15:28:06 2020 +0800
@@ -661,7 +661,7 @@
         MemberName member = new MemberName(Invokers.class, name, type, REF_invokeStatic);
         return new NamedFunction(
                 MemberName.getFactory()
-                        .resolveOrFail(REF_invokeStatic, member, Invokers.class, NoSuchMethodException.class));
+                        .resolveOrFail(REF_invokeStatic, member, Invokers.class, LM_TRUSTED, NoSuchMethodException.class));
     }
 
     private static class Lazy {
--- a/src/java.base/share/classes/java/lang/invoke/LambdaForm.java	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/java.base/share/classes/java/lang/invoke/LambdaForm.java	Tue Sep 08 15:28:06 2020 +0800
@@ -40,7 +40,7 @@
 import java.util.HashMap;
 
 import static java.lang.invoke.LambdaForm.BasicType.*;
-import static java.lang.invoke.MethodHandleNatives.Constants.REF_invokeStatic;
+import static java.lang.invoke.MethodHandleNatives.Constants.*;
 import static java.lang.invoke.MethodHandleStatics.*;
 
 /**
@@ -1758,10 +1758,10 @@
             MemberName idMem = new MemberName(LambdaForm.class, "identity_"+btChar, idType, REF_invokeStatic);
             MemberName zeMem = null;
             try {
-                idMem = IMPL_NAMES.resolveOrFail(REF_invokeStatic, idMem, null, NoSuchMethodException.class);
+                idMem = IMPL_NAMES.resolveOrFail(REF_invokeStatic, idMem, null, LM_TRUSTED, NoSuchMethodException.class);
                 if (!isVoid) {
                     zeMem = new MemberName(LambdaForm.class, "zero_"+btChar, zeType, REF_invokeStatic);
-                    zeMem = IMPL_NAMES.resolveOrFail(REF_invokeStatic, zeMem, null, NoSuchMethodException.class);
+                    zeMem = IMPL_NAMES.resolveOrFail(REF_invokeStatic, zeMem, null, LM_TRUSTED, NoSuchMethodException.class);
                 }
             } catch (IllegalAccessException|NoSuchMethodException ex) {
                 throw newInternalError(ex);
--- a/src/java.base/share/classes/java/lang/invoke/MemberName.java	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/java.base/share/classes/java/lang/invoke/MemberName.java	Tue Sep 08 15:28:06 2020 +0800
@@ -1064,7 +1064,7 @@
          *  If lookup fails or access is not permitted, null is returned.
          *  Otherwise a fresh copy of the given member is returned, with modifier bits filled in.
          */
-        private MemberName resolve(byte refKind, MemberName ref, Class<?> lookupClass,
+        private MemberName resolve(byte refKind, MemberName ref, Class<?> lookupClass, int allowedModes,
                                    boolean speculativeResolve) {
             MemberName m = ref.clone();  // JVM will side-effect the ref
             assert(refKind == m.getReferenceKind());
@@ -1084,7 +1084,7 @@
                 //
                 // REFC view on PTYPES doesn't matter, since it is used only as a starting point for resolution and doesn't
                 // participate in method selection.
-                m = MethodHandleNatives.resolve(m, lookupClass, speculativeResolve);
+                m = MethodHandleNatives.resolve(m, lookupClass, allowedModes, speculativeResolve);
                 if (m == null && speculativeResolve) {
                     return null;
                 }
@@ -1108,10 +1108,12 @@
          *  Otherwise a fresh copy of the given member is returned, with modifier bits filled in.
          */
         public <NoSuchMemberException extends ReflectiveOperationException>
-                MemberName resolveOrFail(byte refKind, MemberName m, Class<?> lookupClass,
-                                 Class<NoSuchMemberException> nsmClass)
+                MemberName resolveOrFail(byte refKind, MemberName m,
+                                         Class<?> lookupClass, int allowedModes,
+                                         Class<NoSuchMemberException> nsmClass)
                 throws IllegalAccessException, NoSuchMemberException {
-            MemberName result = resolve(refKind, m, lookupClass, false);
+            assert lookupClass != null || allowedModes == LM_TRUSTED;
+            MemberName result = resolve(refKind, m, lookupClass, allowedModes, false);
             if (result.isResolved())
                 return result;
             ReflectiveOperationException ex = result.makeAccessException();
@@ -1124,8 +1126,9 @@
          *  If lookup fails or access is not permitted, return null.
          *  Otherwise a fresh copy of the given member is returned, with modifier bits filled in.
          */
-        public MemberName resolveOrNull(byte refKind, MemberName m, Class<?> lookupClass) {
-            MemberName result = resolve(refKind, m, lookupClass, true);
+        public MemberName resolveOrNull(byte refKind, MemberName m, Class<?> lookupClass, int allowedModes) {
+            assert lookupClass != null || allowedModes == LM_TRUSTED;
+            MemberName result = resolve(refKind, m, lookupClass, allowedModes, true);
             if (result != null && result.isResolved())
                 return result;
             return null;
--- a/src/java.base/share/classes/java/lang/invoke/MethodHandleImpl.java	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/java.base/share/classes/java/lang/invoke/MethodHandleImpl.java	Tue Sep 08 15:28:06 2020 +0800
@@ -1764,41 +1764,8 @@
             }
 
             @Override
-            public byte[] generateDirectMethodHandleHolderClassBytes(
-                    String className, MethodType[] methodTypes, int[] types) {
-                return GenerateJLIClassesHelper
-                        .generateDirectMethodHandleHolderClassBytes(
-                                className, methodTypes, types);
-            }
-
-            @Override
-            public byte[] generateDelegatingMethodHandleHolderClassBytes(
-                    String className, MethodType[] methodTypes) {
-                return GenerateJLIClassesHelper
-                        .generateDelegatingMethodHandleHolderClassBytes(
-                                className, methodTypes);
-            }
-
-            @Override
-            public Map.Entry<String, byte[]> generateConcreteBMHClassBytes(
-                    final String types) {
-                return GenerateJLIClassesHelper
-                        .generateConcreteBMHClassBytes(types);
-            }
-
-            @Override
-            public byte[] generateBasicFormsClassBytes(final String className) {
-                return GenerateJLIClassesHelper
-                        .generateBasicFormsClassBytes(className);
-            }
-
-            @Override
-            public byte[] generateInvokersHolderClassBytes(final String className,
-                    MethodType[] invokerMethodTypes,
-                    MethodType[] callSiteMethodTypes) {
-                return GenerateJLIClassesHelper
-                        .generateInvokersHolderClassBytes(className,
-                                invokerMethodTypes, callSiteMethodTypes);
+            public Map<String, byte[]> generateHolderClasses(Stream<String> traces) {
+                return GenerateJLIClassesHelper.generateHolderClasses(traces);
             }
 
             @Override
--- a/src/java.base/share/classes/java/lang/invoke/MethodHandleNatives.java	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/java.base/share/classes/java/lang/invoke/MethodHandleNatives.java	Tue Sep 08 15:28:06 2020 +0800
@@ -51,7 +51,7 @@
 
     static native void init(MemberName self, Object ref);
     static native void expand(MemberName self);
-    static native MemberName resolve(MemberName self, Class<?> caller,
+    static native MemberName resolve(MemberName self, Class<?> caller, int lookupMode,
             boolean speculativeResolve) throws LinkageError, ClassNotFoundException;
     static native int getMembers(Class<?> defc, String matchName, String matchSig,
             int matchFlags, Class<?> caller, int skip, MemberName[] results);
@@ -149,6 +149,15 @@
             HIDDEN_CLASS              = 0x00000002,
             STRONG_LOADER_LINK        = 0x00000004,
             ACCESS_VM_ANNOTATIONS     = 0x00000008;
+
+        /**
+         * Lookup modes
+         */
+        static final int
+            LM_MODULE        = Lookup.MODULE,
+            LM_UNCONDITIONAL = Lookup.UNCONDITIONAL,
+            LM_TRUSTED       = -1;
+
     }
 
     static boolean refKindIsValid(int refKind) {
@@ -561,7 +570,7 @@
                     guardType, REF_invokeStatic);
 
             linker = MemberName.getFactory().resolveOrNull(REF_invokeStatic, linker,
-                                                           VarHandleGuards.class);
+                                                           VarHandleGuards.class, LM_TRUSTED);
             if (linker != null) {
                 return linker;
             }
--- a/src/java.base/share/classes/java/lang/invoke/MethodHandles.java	Wed Sep 02 20:33:29 2020 -0700
+++ b/src/java.base/share/classes/java/lang/invoke/MethodHandles.java	Tue Sep 08 15:28:06 2020 +0800
@@ -1409,14 +1409,7 @@
 
         // This is just for calling out to MethodHandleImpl.
         private Class<?> lookupClassOrNull() {
-            if (allowedModes == TRUSTED) {
-                return null;
-            }
-            if (allowedModes == UNCONDITIONAL) {
-                // use Object as the caller to pass to VM doing resolution
-                return Object.class;
-            }
-            return lookupClass;
+            return (allowedModes == TRUSTED) ? null : lookupClass;
         }
 
         /** Tells which access-protection classes of members this lookup object can produce.
@@ -3442,7 +3435,7 @@
             checkSymbolicClass(refc);  // do this before attempting to resolve
             Objects.requireNonNull(name);