changeset 57531:a6688d292f1a

8167065: Add intrinsic support for double precision shifting on x86_64 Reviewed-by: kvn
author svkamath
date Mon, 23 Dec 2019 14:42:21 -0800
parents d05fcdf25717
children f0634d11b7de
files src/hotspot/cpu/x86/assembler_x86.cpp src/hotspot/cpu/x86/assembler_x86.hpp src/hotspot/cpu/x86/stubGenerator_x86_64.cpp src/hotspot/cpu/x86/vm_version_x86.cpp src/hotspot/cpu/x86/vm_version_x86.hpp src/hotspot/share/aot/aotCodeHeap.cpp src/hotspot/share/classfile/vmSymbols.cpp src/hotspot/share/classfile/vmSymbols.hpp src/hotspot/share/jvmci/vmStructs_jvmci.cpp src/hotspot/share/opto/c2compiler.cpp src/hotspot/share/opto/escape.cpp src/hotspot/share/opto/library_call.cpp src/hotspot/share/opto/runtime.cpp src/hotspot/share/opto/runtime.hpp src/hotspot/share/runtime/stubRoutines.cpp src/hotspot/share/runtime/stubRoutines.hpp src/hotspot/share/runtime/vmStructs.cpp src/java.base/share/classes/java/math/BigInteger.java src/jdk.aot/share/classes/jdk.tools.jaotc.binformat/src/jdk/tools/jaotc/binformat/BinaryContainer.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/CheckGraalIntrinsics.java test/hotspot/jtreg/compiler/intrinsics/bigInteger/TestShift.java test/micro/org/openjdk/bench/java/math/BigIntegers.java
diffstat 22 files changed, 628 insertions(+), 50 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/cpu/x86/assembler_x86.cpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp	Mon Dec 23 14:42:21 2019 -0800
@@ -4257,8 +4257,8 @@
 
 void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
-         vector_len == AVX_256bit? VM_Version::supports_avx2() :
-         0, "");
+         (vector_len == AVX_256bit? VM_Version::supports_avx2() :
+         (vector_len == AVX_512bit? VM_Version::supports_evex() : 0)), "");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@@ -4737,6 +4737,36 @@
   emit_int8((unsigned char)(0xE8 | encode));
 }
 
+void Assembler::shldl(Register dst, Register src) {
+  int encode = prefix_and_encode(src->encoding(), dst->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xA5);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::shldl(Register dst, Register src, int8_t imm8) {
+  int encode = prefix_and_encode(src->encoding(), dst->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xA4);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
+void Assembler::shrdl(Register dst, Register src) {
+  int encode = prefix_and_encode(src->encoding(), dst->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAD);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::shrdl(Register dst, Register src, int8_t imm8) {
+  int encode = prefix_and_encode(src->encoding(), dst->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAC);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
 // copies a single word from [esi] to [edi]
 void Assembler::smovl() {
   emit_int8((unsigned char)0xA5);
@@ -6513,6 +6543,23 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+void Assembler::vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+  assert(VM_Version::supports_vbmi2(), "requires vbmi2");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+  assert(VM_Version::supports_vbmi2(), "requires vbmi2");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
 
 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -8109,26 +8156,6 @@
   emit_int8((unsigned char)(0xE0 | dst->encoding()));
 }
 
-void Assembler::shldl(Register dst, Register src) {
-  emit_int8(0x0F);
-  emit_int8((unsigned char)0xA5);
-  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
-}
-
-// 0F A4 / r ib
-void Assembler::shldl(Register dst, Register src, int8_t imm8) {
-  emit_int8(0x0F);
-  emit_int8((unsigned char)0xA4);
-  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
-  emit_int8(imm8);
-}
-
-void Assembler::shrdl(Register dst, Register src) {
-  emit_int8(0x0F);
-  emit_int8((unsigned char)0xAD);
-  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
-}
-
 #else // LP64
 
 void Assembler::set_byte_if_not_zero(Register dst) {
--- a/src/hotspot/cpu/x86/assembler_x86.hpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp	Mon Dec 23 14:42:21 2019 -0800
@@ -1838,6 +1838,8 @@
 
   void shldl(Register dst, Register src);
   void shldl(Register dst, Register src, int8_t imm8);
+  void shrdl(Register dst, Register src);
+  void shrdl(Register dst, Register src, int8_t imm8);
 
   void shll(Register dst, int imm8);
   void shll(Register dst);
@@ -1845,8 +1847,6 @@
   void shlq(Register dst, int imm8);
   void shlq(Register dst);
 
-  void shrdl(Register dst, Register src);
-
   void shrl(Register dst, int imm8);
   void shrl(Register dst);
 
@@ -2140,6 +2140,9 @@
   void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
   void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
 
+  void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+  void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+
   // And packed integers
   void pand(XMMRegister dst, XMMRegister src);
   void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Mon Dec 23 14:42:21 2019 -0800
@@ -5694,6 +5694,247 @@
     return start;
   }
 
+  address generate_bigIntegerRightShift() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
+
+    address start = __ pc();
+    Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
+    // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
+    const Register newArr = rdi;
+    const Register oldArr = rsi;
+    const Register newIdx = rdx;
+    const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
+    const Register totalNumIter = r8;
+
+    // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
+    // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
+    const Register tmp1 = r11;                    // Caller save.
+    const Register tmp2 = rax;                    // Caller save.
+    const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9);   // Windows: Callee save. Linux: Caller save.
+    const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10);  // Windows: Callee save. Linux: Caller save.
+    const Register tmp5 = r14;                    // Callee save.
+    const Register tmp6 = r15;
+
+    const XMMRegister x0 = xmm0;
+    const XMMRegister x1 = xmm1;
+    const XMMRegister x2 = xmm2;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef _WINDOWS
+    setup_arg_regs(4);
+    // For windows, since last argument is on stack, we need to move it to the appropriate register.
+    __ movl(totalNumIter, Address(rsp, 6 * wordSize));
+    // Save callee save registers.
+    __ push(tmp3);
+    __ push(tmp4);
+#endif
+    __ push(tmp5);
+
+    // Rename temps used throughout the code.
+    const Register idx = tmp1;
+    const Register nIdx = tmp2;
+
+    __ xorl(idx, idx);
+
+    // Start right shift from end of the array.
+    // For example, if #iteration = 4 and newIdx = 1
+    // then dest[4] = src[4] >> shiftCount  | src[3] <<< (shiftCount - 32)
+    // if #iteration = 4 and newIdx = 0
+    // then dest[3] = src[4] >> shiftCount  | src[3] <<< (shiftCount - 32)
+    __ movl(idx, totalNumIter);
+    __ movl(nIdx, idx);
+    __ addl(nIdx, newIdx);
+
+    // If vectorization is enabled, check if the number of iterations is at least 64
+    // If not, then go to ShifTwo processing 2 iterations
+    if (VM_Version::supports_vbmi2()) {
+      __ cmpptr(totalNumIter, (AVX3Threshold/64));
+      __ jcc(Assembler::less, ShiftTwo);
+
+      if (AVX3Threshold < 16 * 64) {
+        __ cmpl(totalNumIter, 16);
+        __ jcc(Assembler::less, ShiftTwo);
+      }
+      __ evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit);
+      __ subl(idx, 16);
+      __ subl(nIdx, 16);
+      __ BIND(Shift512Loop);
+      __ evmovdqul(x2, Address(oldArr, idx, Address::times_4, 4), Assembler::AVX_512bit);
+      __ evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit);
+      __ vpshrdvd(x2, x1, x0, Assembler::AVX_512bit);
+      __ evmovdqul(Address(newArr, nIdx, Address::times_4), x2, Assembler::AVX_512bit);
+      __ subl(nIdx, 16);
+      __ subl(idx, 16);
+      __ jcc(Assembler::greaterEqual, Shift512Loop);
+      __ addl(idx, 16);
+      __ addl(nIdx, 16);
+    }
+    __ BIND(ShiftTwo);
+    __ cmpl(idx, 2);
+    __ jcc(Assembler::less, ShiftOne);
+    __ subl(idx, 2);
+    __ subl(nIdx, 2);
+    __ BIND(ShiftTwoLoop);
+    __ movl(tmp5, Address(oldArr, idx, Address::times_4, 8));
+    __ movl(tmp4, Address(oldArr, idx, Address::times_4, 4));
+    __ movl(tmp3, Address(oldArr, idx, Address::times_4));
+    __ shrdl(tmp5, tmp4);
+    __ shrdl(tmp4, tmp3);
+    __ movl(Address(newArr, nIdx, Address::times_4, 4), tmp5);
+    __ movl(Address(newArr, nIdx, Address::times_4), tmp4);
+    __ subl(nIdx, 2);
+    __ subl(idx, 2);
+    __ jcc(Assembler::greaterEqual, ShiftTwoLoop);
+    __ addl(idx, 2);
+    __ addl(nIdx, 2);
+
+    // Do the last iteration
+    __ BIND(ShiftOne);
+    __ cmpl(idx, 1);
+    __ jcc(Assembler::less, Exit);
+    __ subl(idx, 1);
+    __ subl(nIdx, 1);
+    __ movl(tmp4, Address(oldArr, idx, Address::times_4, 4));
+    __ movl(tmp3, Address(oldArr, idx, Address::times_4));
+    __ shrdl(tmp4, tmp3);
+    __ movl(Address(newArr, nIdx, Address::times_4), tmp4);
+    __ BIND(Exit);
+    // Restore callee save registers.
+    __ pop(tmp5);
+#ifdef _WINDOWS
+    __ pop(tmp4);
+    __ pop(tmp3);
+    restore_arg_regs();
+#endif
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+    return start;
+  }
+
+   /**
+   *  Arguments:
+   *
+   *  Input:
+   *    c_rarg0   - newArr address
+   *    c_rarg1   - oldArr address
+   *    c_rarg2   - newIdx
+   *    c_rarg3   - shiftCount
+   * not Win64
+   *    c_rarg4   - numIter
+   * Win64
+   *    rsp40    - numIter
+   */
+  address generate_bigIntegerLeftShift() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this,  "StubRoutines", "bigIntegerLeftShiftWorker");
+    address start = __ pc();
+    Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
+    // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
+    const Register newArr = rdi;
+    const Register oldArr = rsi;
+    const Register newIdx = rdx;
+    const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
+    const Register totalNumIter = r8;
+    // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
+    // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
+    const Register tmp1 = r11;                    // Caller save.
+    const Register tmp2 = rax;                    // Caller save.
+    const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9);   // Windows: Callee save. Linux: Caller save.
+    const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10);  // Windows: Callee save. Linux: Caller save.
+    const Register tmp5 = r14;                    // Callee save.
+
+    const XMMRegister x0 = xmm0;
+    const XMMRegister x1 = xmm1;
+    const XMMRegister x2 = xmm2;
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef _WINDOWS
+    setup_arg_regs(4);
+    // For windows, since last argument is on stack, we need to move it to the appropriate register.
+    __ movl(totalNumIter, Address(rsp, 6 * wordSize));
+    // Save callee save registers.
+    __ push(tmp3);
+    __ push(tmp4);
+#endif
+    __ push(tmp5);
+
+    // Rename temps used throughout the code
+    const Register idx = tmp1;
+    const Register numIterTmp = tmp2;
+
+    // Start idx from zero.
+    __ xorl(idx, idx);
+    // Compute interior pointer for new array. We do this so that we can use same index for both old and new arrays.
+    __ lea(newArr, Address(newArr, newIdx, Address::times_4));
+    __ movl(numIterTmp, totalNumIter);
+
+    // If vectorization is enabled, check if the number of iterations is at least 64
+    // If not, then go to ShiftTwo shifting two numbers at a time
+    if (VM_Version::supports_vbmi2()) {
+      __ cmpl(totalNumIter, (AVX3Threshold/64));
+      __ jcc(Assembler::less, ShiftTwo);
+
+      if (AVX3Threshold < 16 * 64) {
+        __ cmpl(totalNumIter, 16);
+        __ jcc(Assembler::less, ShiftTwo);
+      }
+      __ evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit);
+      __ subl(numIterTmp, 16);
+      __ BIND(Shift512Loop);
+      __ evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit);
+      __ evmovdqul(x2, Address(oldArr, idx, Address::times_4, 0x4), Assembler::AVX_512bit);
+      __ vpshldvd(x1, x2, x0, Assembler::AVX_512bit);
+      __ evmovdqul(Address(newArr, idx, Address::times_4), x1, Assembler::AVX_512bit);
+      __ addl(idx, 16);
+      __ subl(numIterTmp, 16);
+      __ jcc(Assembler::greaterEqual, Shift512Loop);
+      __ addl(numIterTmp, 16);
+    }
+    __ BIND(ShiftTwo);
+    __ cmpl(totalNumIter, 1);
+    __ jcc(Assembler::less, Exit);
+    __ movl(tmp3, Address(oldArr, idx, Address::times_4));
+    __ subl(numIterTmp, 2);
+    __ jcc(Assembler::less, ShiftOne);
+
+    __ BIND(ShiftTwoLoop);
+    __ movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4));
+    __ movl(tmp5, Address(oldArr, idx, Address::times_4, 0x8));
+    __ shldl(tmp3, tmp4);
+    __ shldl(tmp4, tmp5);
+    __ movl(Address(newArr, idx, Address::times_4), tmp3);
+    __ movl(Address(newArr, idx, Address::times_4, 0x4), tmp4);
+    __ movl(tmp3, tmp5);
+    __ addl(idx, 2);
+    __ subl(numIterTmp, 2);
+    __ jcc(Assembler::greaterEqual, ShiftTwoLoop);
+
+    // Do the last iteration
+    __ BIND(ShiftOne);
+    __ addl(numIterTmp, 2);
+    __ cmpl(numIterTmp, 1);
+    __ jcc(Assembler::less, Exit);
+    __ movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4));
+    __ shldl(tmp3, tmp4);
+    __ movl(Address(newArr, idx, Address::times_4), tmp3);
+
+    __ BIND(Exit);
+    // Restore callee save registers.
+    __ pop(tmp5);
+#ifdef _WINDOWS
+    __ pop(tmp4);
+    __ pop(tmp3);
+    restore_arg_regs();
+#endif
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+    return start;
+  }
+
   address generate_libmExp() {
     StubCodeMark mark(this, "StubRoutines", "libmExp");
 
@@ -6314,6 +6555,10 @@
     if (UseMulAddIntrinsic) {
       StubRoutines::_mulAdd = generate_mulAdd();
     }
+    if (VM_Version::supports_vbmi2()) {
+      StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
+      StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
+    }
 #ifndef _WINDOWS
     if (UseMontgomeryMultiplyIntrinsic) {
       StubRoutines::_montgomeryMultiply
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp	Mon Dec 23 14:42:21 2019 -0800
@@ -694,6 +694,7 @@
     _features &= ~CPU_AVX512_VPCLMULQDQ;
     _features &= ~CPU_VAES;
     _features &= ~CPU_VNNI;
+    _features &= ~CPU_VBMI2;
   }
 
   if (UseAVX < 2)
@@ -716,7 +717,7 @@
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -749,7 +750,10 @@
                (supports_adx() ? ", adx" : ""),
                (supports_evex() ? ", evex" : ""),
                (supports_sha() ? ", sha" : ""),
-               (supports_fma() ? ", fma" : ""));
+               (supports_fma() ? ", fma" : ""),
+               (supports_vbmi2() ? ", vbmi2" : ""),
+               (supports_vaes() ? ", vaes" : ""),
+               (supports_vnni() ? ", vnni" : ""));
   _features_string = os::strdup(buf);
 
   // UseSSE is set to the smaller of what hardware supports and what
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp	Mon Dec 23 14:42:21 2019 -0800
@@ -345,6 +345,8 @@
 #define CPU_FLUSH ((uint64_t)UCONST64(0x20000000000))  // flush instruction
 #define CPU_FLUSHOPT ((uint64_t)UCONST64(0x40000000000)) // flushopt instruction
 #define CPU_CLWB ((uint64_t)UCONST64(0x80000000000))   // clwb instruction
+#define CPU_VBMI2 ((uint64_t)UCONST64(0x100000000000))   // VBMI2 shift left double instructions
+
 
 enum Extended_Family {
     // AMD
@@ -567,6 +569,8 @@
           result |= CPU_VAES;
         if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
           result |= CPU_VNNI;
+        if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
+          result |= CPU_VBMI2;
       }
     }
     if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
@@ -858,6 +862,7 @@
   static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; }
   static bool supports_vaes()       { return (_features & CPU_VAES) != 0; }
   static bool supports_vnni()       { return (_features & CPU_VNNI) != 0; }
+  static bool supports_vbmi2()      { return (_features & CPU_VBMI2) != 0; }
 
   // Intel features
   static bool is_intel_family_core() { return is_intel() &&
--- a/src/hotspot/share/aot/aotCodeHeap.cpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/share/aot/aotCodeHeap.cpp	Mon Dec 23 14:42:21 2019 -0800
@@ -555,6 +555,8 @@
     SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_montgomeryMultiply",  address, StubRoutines::_montgomeryMultiply);
     SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_montgomerySquare", address, StubRoutines::_montgomerySquare);
     SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_vectorizedMismatch", address, StubRoutines::_vectorizedMismatch);
+    SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_bigIntegerRightShiftWorker", address, StubRoutines::_bigIntegerRightShiftWorker);
+    SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_bigIntegerLeftShiftWorker", address, StubRoutines::_bigIntegerLeftShiftWorker);
 
     SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_throw_delayed_StackOverflowError_entry", address, StubRoutines::_throw_delayed_StackOverflowError_entry);
 
--- a/src/hotspot/share/classfile/vmSymbols.cpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/share/classfile/vmSymbols.cpp	Mon Dec 23 14:42:21 2019 -0800
@@ -837,6 +837,9 @@
   case vmIntrinsics::_montgomerySquare:
     if (!UseMontgomerySquareIntrinsic) return true;
     break;
+  case vmIntrinsics::_bigIntegerRightShiftWorker:
+  case vmIntrinsics::_bigIntegerLeftShiftWorker:
+    break;
   case vmIntrinsics::_addExactI:
   case vmIntrinsics::_addExactL:
   case vmIntrinsics::_decrementExactI:
--- a/src/hotspot/share/classfile/vmSymbols.hpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/share/classfile/vmSymbols.hpp	Mon Dec 23 14:42:21 2019 -0800
@@ -565,6 +565,7 @@
   template(char_StringBuffer_signature,               "(C)Ljava/lang/StringBuffer;")                              \
   template(int_String_signature,                      "(I)Ljava/lang/String;")                                    \
   template(boolean_boolean_int_signature,             "(ZZ)I")                                                    \
+  template(big_integer_shift_worker_signature,        "([I[IIII)V")                                               \
   template(reflect_method_signature,                  "Ljava/lang/reflect/Method;")                                                    \
   /* signature symbols needed by intrinsics */                                                                    \
   VM_INTRINSICS_DO(VM_INTRINSIC_IGNORE, VM_SYMBOL_IGNORE, VM_SYMBOL_IGNORE, template, VM_ALIAS_IGNORE)            \
@@ -1007,6 +1008,12 @@
    do_name(     montgomerySquare_name,                             "implMontgomerySquare")                              \
    do_signature(montgomerySquare_signature,                        "([I[IIJ[I)[I")                                      \
                                                                                                                         \
+  do_intrinsic(_bigIntegerRightShiftWorker, java_math_BigInteger, rightShift_name, big_integer_shift_worker_signature, F_S) \
+   do_name(     rightShift_name,                                 "shiftRightImplWorker")                                \
+                                                                                                                        \
+  do_intrinsic(_bigIntegerLeftShiftWorker, java_math_BigInteger, leftShift_name, big_integer_shift_worker_signature, F_S) \
+   do_name(     leftShift_name,                                 "shiftLeftImplWorker")                                  \
+                                                                                                                        \
   do_class(jdk_internal_util_ArraysSupport, "jdk/internal/util/ArraysSupport")                                                          \
   do_intrinsic(_vectorizedMismatch, jdk_internal_util_ArraysSupport, vectorizedMismatch_name, vectorizedMismatch_signature, F_S)\
    do_name(vectorizedMismatch_name, "vectorizedMismatch")                                                               \
--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp	Mon Dec 23 14:42:21 2019 -0800
@@ -322,6 +322,8 @@
   static_field(StubRoutines,                _montgomeryMultiply,                              address)                               \
   static_field(StubRoutines,                _montgomerySquare,                                address)                               \
   static_field(StubRoutines,                _vectorizedMismatch,                              address)                               \
+  static_field(StubRoutines,                _bigIntegerRightShiftWorker,                      address)                               \
+  static_field(StubRoutines,                _bigIntegerLeftShiftWorker,                       address)                               \
                                                                                                                                      \
   nonstatic_field(Thread,                   _tlab,                                            ThreadLocalAllocBuffer)                \
   nonstatic_field(Thread,                   _allocated_bytes,                                 jlong)                                 \
--- a/src/hotspot/share/opto/c2compiler.cpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/share/opto/c2compiler.cpp	Mon Dec 23 14:42:21 2019 -0800
@@ -628,6 +628,8 @@
   case vmIntrinsics::_mulAdd:
   case vmIntrinsics::_montgomeryMultiply:
   case vmIntrinsics::_montgomerySquare:
+  case vmIntrinsics::_bigIntegerRightShiftWorker:
+  case vmIntrinsics::_bigIntegerLeftShiftWorker:
   case vmIntrinsics::_vectorizedMismatch:
   case vmIntrinsics::_ghash_processBlocks:
   case vmIntrinsics::_base64_encodeBlock:
--- a/src/hotspot/share/opto/escape.cpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/share/opto/escape.cpp	Mon Dec 23 14:42:21 2019 -0800
@@ -1006,6 +1006,8 @@
                   strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "bigIntegerRightShiftWorker") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "bigIntegerLeftShiftWorker") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "vectorizedMismatch") == 0)
                  ))) {
             call->dump();
--- a/src/hotspot/share/opto/library_call.cpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/share/opto/library_call.cpp	Mon Dec 23 14:42:21 2019 -0800
@@ -327,6 +327,7 @@
   bool inline_mulAdd();
   bool inline_montgomeryMultiply();
   bool inline_montgomerySquare();
+  bool inline_bigIntegerShift(bool isRightShift);
   bool inline_vectorizedMismatch();
   bool inline_fma(vmIntrinsics::ID id);
   bool inline_character_compare(vmIntrinsics::ID id);
@@ -845,6 +846,11 @@
   case vmIntrinsics::_montgomerySquare:
     return inline_montgomerySquare();
 
+  case vmIntrinsics::_bigIntegerRightShiftWorker:
+    return inline_bigIntegerShift(true);
+  case vmIntrinsics::_bigIntegerLeftShiftWorker:
+    return inline_bigIntegerShift(false);
+
   case vmIntrinsics::_vectorizedMismatch:
     return inline_vectorizedMismatch();
 
@@ -5253,6 +5259,60 @@
   return true;
 }
 
+bool LibraryCallKit::inline_bigIntegerShift(bool isRightShift) {
+  address stubAddr = NULL;
+  const char* stubName = NULL;
+
+  stubAddr = isRightShift? StubRoutines::bigIntegerRightShift(): StubRoutines::bigIntegerLeftShift();
+  if (stubAddr == NULL) {
+    return false; // Intrinsic's stub is not implemented on this platform
+  }
+
+  stubName = isRightShift? "bigIntegerRightShiftWorker" : "bigIntegerLeftShiftWorker";
+
+  assert(callee()->signature()->size() == 5, "expected 5 arguments");
+
+  Node* newArr = argument(0);
+  Node* oldArr = argument(1);
+  Node* newIdx = argument(2);
+  Node* shiftCount = argument(3);
+  Node* numIter = argument(4);
+
+  const Type* newArr_type = newArr->Value(&_gvn);
+  const TypeAryPtr* top_newArr = newArr_type->isa_aryptr();
+  const Type* oldArr_type = oldArr->Value(&_gvn);
+  const TypeAryPtr* top_oldArr = oldArr_type->isa_aryptr();
+  if (top_newArr == NULL || top_newArr->klass() == NULL || top_oldArr == NULL
+      || top_oldArr->klass() == NULL) {
+    return false;
+  }
+
+  BasicType newArr_elem = newArr_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType oldArr_elem = oldArr_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (newArr_elem != T_INT || oldArr_elem != T_INT) {
+    return false;
+  }
+
+  // Make the call
+  {
+    Node* newArr_start = array_element_address(newArr, intcon(0), newArr_elem);
+    Node* oldArr_start = array_element_address(oldArr, intcon(0), oldArr_elem);
+
+    Node* call = make_runtime_call(RC_LEAF,
+                                   OptoRuntime::bigIntegerShift_Type(),
+                                   stubAddr,
+                                   stubName,
+                                   TypePtr::BOTTOM,
+                                   newArr_start,
+                                   oldArr_start,
+                                   newIdx,
+                                   shiftCount,
+                                   numIter);
+  }
+
+  return true;
+}
+
 //-------------inline_vectorizedMismatch------------------------------
 bool LibraryCallKit::inline_vectorizedMismatch() {
   assert(UseVectorizedMismatchIntrinsic, "not implementated on this platform");
--- a/src/hotspot/share/opto/runtime.cpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/share/opto/runtime.cpp	Mon Dec 23 14:42:21 2019 -0800
@@ -1111,6 +1111,25 @@
   return TypeFunc::make(domain, range);
 }
 
+const TypeFunc * OptoRuntime::bigIntegerShift_Type() {
+  int argcnt = 5;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // newArr
+  fields[argp++] = TypePtr::NOTNULL;    // oldArr
+  fields[argp++] = TypeInt::INT;        // newIdx
+  fields[argp++] = TypeInt::INT;        // shiftCount
+  fields[argp++] = TypeInt::INT;        // numIter
+  assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
+
+  // no result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms + 0] = NULL;
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
 const TypeFunc* OptoRuntime::vectorizedMismatch_Type() {
   // create input type (domain)
   int num_args = 4;
--- a/src/hotspot/share/opto/runtime.hpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/share/opto/runtime.hpp	Mon Dec 23 14:42:21 2019 -0800
@@ -289,6 +289,8 @@
 
   static const TypeFunc* mulAdd_Type();
 
+  static const TypeFunc* bigIntegerShift_Type();
+
   static const TypeFunc* vectorizedMismatch_Type();
 
   static const TypeFunc* ghash_processBlocks_Type();
--- a/src/hotspot/share/runtime/stubRoutines.cpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/share/runtime/stubRoutines.cpp	Mon Dec 23 14:42:21 2019 -0800
@@ -157,6 +157,8 @@
 address StubRoutines::_mulAdd = NULL;
 address StubRoutines::_montgomeryMultiply = NULL;
 address StubRoutines::_montgomerySquare = NULL;
+address StubRoutines::_bigIntegerRightShiftWorker = NULL;
+address StubRoutines::_bigIntegerLeftShiftWorker = NULL;
 
 address StubRoutines::_vectorizedMismatch = NULL;
 
--- a/src/hotspot/share/runtime/stubRoutines.hpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/share/runtime/stubRoutines.hpp	Mon Dec 23 14:42:21 2019 -0800
@@ -239,6 +239,8 @@
   static address _mulAdd;
   static address _montgomeryMultiply;
   static address _montgomerySquare;
+  static address _bigIntegerRightShiftWorker;
+  static address _bigIntegerLeftShiftWorker;
 
   static address _vectorizedMismatch;
 
@@ -414,6 +416,8 @@
   static address mulAdd()              { return _mulAdd; }
   static address montgomeryMultiply()  { return _montgomeryMultiply; }
   static address montgomerySquare()    { return _montgomerySquare; }
+  static address bigIntegerRightShift() { return _bigIntegerRightShiftWorker; }
+  static address bigIntegerLeftShift()  { return _bigIntegerLeftShiftWorker; }
 
   static address vectorizedMismatch()  { return _vectorizedMismatch; }
 
--- a/src/hotspot/share/runtime/vmStructs.cpp	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/hotspot/share/runtime/vmStructs.cpp	Mon Dec 23 14:42:21 2019 -0800
@@ -602,6 +602,8 @@
      static_field(StubRoutines,                _updateBytesCRC32C,                            address)                               \
      static_field(StubRoutines,                _multiplyToLen,                                address)                               \
      static_field(StubRoutines,                _squareToLen,                                  address)                               \
+     static_field(StubRoutines,                _bigIntegerRightShiftWorker,                   address)                               \
+     static_field(StubRoutines,                _bigIntegerLeftShiftWorker,                    address)                               \
      static_field(StubRoutines,                _mulAdd,                                       address)                               \
      static_field(StubRoutines,                _dexp,                                         address)                               \
      static_field(StubRoutines,                _dlog,                                         address)                               \
--- a/src/java.base/share/classes/java/math/BigInteger.java	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/java.base/share/classes/java/math/BigInteger.java	Mon Dec 23 14:42:21 2019 -0800
@@ -42,6 +42,7 @@
 import jdk.internal.math.FloatConsts;
 import jdk.internal.HotSpotIntrinsicCandidate;
 import jdk.internal.vm.annotation.Stable;
+import jdk.internal.vm.annotation.ForceInline;
 
 /**
  * Immutable arbitrary-precision integers.  All operations behave as if
@@ -2621,12 +2622,8 @@
 
     // shifts a up to len right n bits assumes no leading zeros, 0<n<32
     static void primitiveRightShift(int[] a, int len, int n) {
-        int n2 = 32 - n;
-        for (int i=len-1, c=a[i]; i > 0; i--) {
-            int b = c;
-            c = a[i-1];
-            a[i] = (c << n2) | (b >>> n);
-        }
+        Objects.checkFromToIndex(0, len, a.length);
+        shiftRightImplWorker(a, a, 1, n, len-1);
         a[0] >>>= n;
     }
 
@@ -2634,13 +2631,8 @@
     static void primitiveLeftShift(int[] a, int len, int n) {
         if (len == 0 || n == 0)
             return;
-
-        int n2 = 32 - n;
-        for (int i=0, c=a[i], m=i+len-1; i < m; i++) {
-            int b = c;
-            c = a[i+1];
-            a[i] = (b << n) | (c >>> n2);
-        }
+        Objects.checkFromToIndex(0, len, a.length);
+        shiftLeftImplWorker(a, a, 0, n, len-1);
         a[len-1] <<= n;
     }
 
@@ -3353,14 +3345,25 @@
             } else {
                 newMag = new int[magLen + nInts];
             }
-            int j=0;
-            while (j < magLen-1)
-                newMag[i++] = mag[j++] << nBits | mag[j] >>> nBits2;
-            newMag[i] = mag[j] << nBits;
+            int numIter = magLen - 1;
+            Objects.checkFromToIndex(0, numIter + 1, mag.length);
+            Objects.checkFromToIndex(i, numIter + i + 1, newMag.length);
+            shiftLeftImplWorker(newMag, mag, i, nBits, numIter);
+            newMag[numIter + i] = mag[numIter] << nBits;
         }
         return newMag;
     }
 
+    @ForceInline
+    @HotSpotIntrinsicCandidate
+    private static void shiftLeftImplWorker(int[] newArr, int[] oldArr, int newIdx, int shiftCount, int numIter) {
+        int shiftCountRight = 32 - shiftCount;
+        int oldIdx = 0;
+        while (oldIdx < numIter) {
+            newArr[newIdx++] = (oldArr[oldIdx++] << shiftCount) | (oldArr[oldIdx] >>> shiftCountRight);
+        }
+    }
+
     /**
      * Returns a BigInteger whose value is {@code (this >> n)}.  Sign
      * extension is performed.  The shift distance, {@code n}, may be
@@ -3415,11 +3418,10 @@
             } else {
                 newMag = new int[magLen - nInts -1];
             }
-
-            int nBits2 = 32 - nBits;
-            int j=0;
-            while (j < magLen - nInts - 1)
-                newMag[i++] = (mag[j++] << nBits2) | (mag[j] >>> nBits);
+            int numIter = magLen - nInts - 1;
+            Objects.checkFromToIndex(0, numIter + 1, mag.length);
+            Objects.checkFromToIndex(i, numIter + i, newMag.length);
+            shiftRightImplWorker(newMag, mag, i, nBits, numIter);
         }
 
         if (signum < 0) {
@@ -3437,6 +3439,17 @@
         return new BigInteger(newMag, signum);
     }
 
+    @ForceInline
+    @HotSpotIntrinsicCandidate
+    private static void shiftRightImplWorker(int[] newArr, int[] oldArr, int newIdx, int shiftCount, int numIter) {
+        int shiftCountLeft = 32 - shiftCount;
+        int idx = numIter;
+        int nidx = (newIdx == 0) ? numIter - 1 : numIter;
+        while (nidx >= newIdx) {
+            newArr[nidx--] = (oldArr[idx--] >>> shiftCount) | (oldArr[idx] << shiftCountLeft);
+        }
+    }
+
     int[] javaIncrement(int[] val) {
         int lastSum = 0;
         for (int i=val.length-1;  i >= 0 && lastSum == 0; i--)
--- a/src/jdk.aot/share/classes/jdk.tools.jaotc.binformat/src/jdk/tools/jaotc/binformat/BinaryContainer.java	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/jdk.aot/share/classes/jdk.tools.jaotc.binformat/src/jdk/tools/jaotc/binformat/BinaryContainer.java	Mon Dec 23 14:42:21 2019 -0800
@@ -229,6 +229,8 @@
         {"StubRoutines::_montgomeryMultiply", "_aot_stub_routines_montgomeryMultiply" },
         {"StubRoutines::_montgomerySquare", "_aot_stub_routines_montgomerySquare" },
         {"StubRoutines::_vectorizedMismatch", "_aot_stub_routines_vectorizedMismatch" },
+        {"StubRoutines::_bigIntegerRightShiftWorker", "_aot_stub_routines_bigIntegerRightShiftWorker" },
+        {"StubRoutines::_bigIntegerLeftShiftWorker", "_aot_stub_routines_bigIntegerLeftShiftWorker" },
 
         {"StubRoutines::_throw_delayed_StackOverflowError_entry", "_aot_stub_routines_throw_delayed_StackOverflowError_entry" },
 
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/CheckGraalIntrinsics.java	Sat Dec 21 15:27:37 2019 +0800
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/CheckGraalIntrinsics.java	Mon Dec 23 14:42:21 2019 -0800
@@ -416,7 +416,9 @@
         if (isJDK14OrHigher()) {
             add(toBeInvestigated,
                             "com/sun/crypto/provider/ElectronicCodeBook.implECBDecrypt([BII[BI)I",
-                            "com/sun/crypto/provider/ElectronicCodeBook.implECBEncrypt([BII[BI)I");
+                            "com/sun/crypto/provider/ElectronicCodeBook.implECBEncrypt([BII[BI)I",
+                            "java/math/BigInteger.shiftLeftImplWorker([I[IIII)V",
+                            "java/math/BigInteger.shiftRightImplWorker([I[IIII)V");
         }
 
         if (!config.inlineNotify()) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/compiler/intrinsics/bigInteger/TestShift.java	Mon Dec 23 14:42:21 2019 -0800
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8234692
+ * @summary Add C2 x86 intrinsic for BigInteger::shiftLeft() and BigInteger::shiftRight() method
+ * @requires vm.compiler2.enabled
+ *
+ * @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch
+ *      -XX:CompileCommand=exclude,compiler.intrinsics.bigInteger.TestShift::main
+ *      -XX:CompileCommand=option,compiler.intrinsics.bigInteger.TestShift::base_left_shift,ccstr,DisableIntrinsic,_bigIntegerLeftShiftWorker
+ *      -XX:CompileCommand=option,compiler.intrinsics.bigInteger.TestShift::base_right_shift,ccstr,DisableIntrinsic,_bigIntegerRightShiftWorker
+ *      -XX:CompileCommand=inline,java.math.BigInteger::shiftLeft
+ *      -XX:CompileCommand=inline,java.math.BigInteger::shiftRight
+ *      compiler.intrinsics.bigInteger.TestShift
+ *
+ * @run main/othervm/timeout=600
+ *      -XX:CompileCommand=exclude,compiler.intrinsics.bigInteger.TestShift::main
+ *      -XX:CompileCommand=option,compiler.intrinsics.bigInteger.TestShift::base_left_shift,ccstr,DisableIntrinsic,_bigIntegerLeftShiftWorker
+ *      -XX:CompileCommand=option,compiler.intrinsics.bigInteger.TestShift::base_right_shift,ccstr,DisableIntrinsic,_bigIntegerRightShiftWorker
+ *      -XX:CompileCommand=inline,java.math.BigInteger::shiftLeft
+ *      -XX:CompileCommand=inline,java.math.BigInteger::shiftRight
+ *      compiler.intrinsics.bigInteger.TestShift
+ *
+ */
+
+package compiler.intrinsics.bigInteger;
+
+import java.math.BigInteger;
+import java.util.Arrays;
+import java.util.Random;
+
+public class TestShift {
+
+    public static BigInteger base_left_shift(BigInteger op1, int shift) {
+      return op1.shiftLeft(shift);
+    }
+
+    public static BigInteger new_left_shift(BigInteger op1, int shift) {
+      return op1.shiftLeft(shift);
+    }
+
+    public static BigInteger base_right_shift(BigInteger op1, int shift) {
+      return op1.shiftRight(shift);
+    }
+
+    public static BigInteger new_right_shift(BigInteger op1, int shift) {
+      return op1.shiftRight(shift);
+    }
+
+    public static boolean bytecompare(BigInteger b1, BigInteger b2) {
+      byte[] data1 = b1.toByteArray();
+      byte[] data2 = b2.toByteArray();
+      if (data1.length != data2.length)
+        return false;
+      for (int i = 0; i < data1.length; i++) {
+        if (data1[i] != data2[i])
+          return false;
+      }
+      return true;
+    }
+
+    public static String stringify(BigInteger b) {
+      String strout= "";
+      byte [] data = b.toByteArray();
+      for (int i = 0; i < data.length; i++) {
+        strout += (String.format("%02x",data[i]) + " ");
+      }
+      return strout;
+    }
+
+    public static void main(String args[]) throws Exception {
+      BigInteger [] inputbuffer = new BigInteger[10];
+      BigInteger [] oldLeftShiftResult = new BigInteger[10];
+      BigInteger [] newLeftShiftResult = new BigInteger[10];
+      BigInteger [] oldRightShiftResult = new BigInteger[10];
+      BigInteger [] newRightShiftResult = new BigInteger[10];
+
+      Random rand = new Random();
+      long seed = System.nanoTime();
+      rand.setSeed(seed);
+      int shiftCount = rand.nextInt(30) + 1;
+
+      for(int i = 0; i < inputbuffer.length; i++) {
+        int numbits = rand.nextInt(4096)+32;
+        inputbuffer[i] = new BigInteger(numbits, rand);
+      }
+
+      for (int j = 0; j < 100000; j++) {
+        for(int i = 0; i < inputbuffer.length; i++) {
+           oldLeftShiftResult[i] = base_left_shift(inputbuffer[i], shiftCount);
+           newLeftShiftResult[i] = new_left_shift(inputbuffer[i], shiftCount);
+           if (!bytecompare(oldLeftShiftResult[i], newLeftShiftResult[i])) {
+            System.out.println("mismatch for input:" + stringify(inputbuffer[i]) + "\n" + "expected left shift result:" + stringify(oldLeftShiftResult[i]) + "\n" +
+                               "calculated left shift result:" + stringify(newLeftShiftResult[i]));
+            throw new Exception("Failed");
+          }
+
+          oldRightShiftResult[i] = base_right_shift(inputbuffer[i], shiftCount);
+          newRightShiftResult[i] = new_right_shift(inputbuffer[i], shiftCount);
+          if (!bytecompare(oldRightShiftResult[i], newRightShiftResult[i])) {
+            System.out.println("mismatch for input:" + stringify(inputbuffer[i]) + "\n" + "expected right shift result:" + stringify(oldRightShiftResult[i]) + "\n" +
+                               "calculated right shift result:" + stringify(newRightShiftResult[i]));
+            throw new Exception("Failed");
+          }
+        }
+      }
+    }
+}
--- a/test/micro/org/openjdk/bench/java/math/BigIntegers.java	Sat Dec 21 15:27:37 2019 +0800
+++ b/test/micro/org/openjdk/bench/java/math/BigIntegers.java	Mon Dec 23 14:42:21 2019 -0800
@@ -45,7 +45,7 @@
 @State(Scope.Thread)
 public class BigIntegers {
 
-    private BigInteger[] hugeArray, largeArray, smallArray;
+    private BigInteger[] hugeArray, largeArray, smallArray, shiftArray;
     public String[] dummyStringArray;
     public Object[] dummyArr;
     private static final int TESTSIZE = 1000;
@@ -53,6 +53,7 @@
     @Setup
     public void setup() {
         Random r = new Random(1123);
+        int numbits = r.nextInt(16384);
 
         hugeArray = new BigInteger[TESTSIZE]; /*
          * Huge numbers larger than
@@ -67,6 +68,10 @@
          * Small number less than
          * MAX_INT
          */
+        shiftArray = new BigInteger[TESTSIZE]; /*
+         * Each array entry is atmost 16k bits
+         * in size
+         */
 
         dummyStringArray = new String[TESTSIZE];
         dummyArr = new Object[TESTSIZE];
@@ -78,6 +83,7 @@
                     + ((long) value + (long) Integer.MAX_VALUE));
             largeArray[i] = new BigInteger("" + ((long) value + (long) Integer.MAX_VALUE));
             smallArray[i] = new BigInteger("" + ((long) value / 1000));
+            shiftArray[i] = new BigInteger(numbits, r);
         }
     }
 
@@ -137,4 +143,38 @@
         }
         bh.consume(tmp);
     }
+
+    /** Invokes the shiftLeft method of BigInteger with different values. */
+    @Benchmark
+    @OperationsPerInvocation(TESTSIZE)
+    public void testLeftShift(Blackhole bh) {
+        Random rand = new Random();
+        int shift = rand.nextInt(30) + 1;
+        BigInteger tmp = null;
+        for (BigInteger s : shiftArray) {
+            if (tmp == null) {
+                tmp = s;
+                continue;
+            }
+            tmp = tmp.shiftLeft(shift);
+        }
+        bh.consume(tmp);
+    }
+
+    /** Invokes the shiftRight method of BigInteger with different values. */
+    @Benchmark
+    @OperationsPerInvocation(TESTSIZE)
+    public void testRightShift(Blackhole bh) {
+        Random rand = new Random();
+        int shift = rand.nextInt(30) + 1;
+        BigInteger tmp = null;
+        for (BigInteger s : shiftArray) {
+            if (tmp == null) {
+                tmp = s;
+                continue;
+            }
+            tmp = tmp.shiftRight(shift);
+        }
+        bh.consume(tmp);
+    }
 }