changeset 57871:db30f31b9a8e

7175279: Don't use x87 FPU on x86-64 Reviewed-by: kvn, roland
author vlivanov
date Thu, 30 Jan 2020 00:46:43 +0300
parents ea066e5bdfd6
children 029fe1db925d
files src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp src/hotspot/cpu/x86/assembler_x86.cpp src/hotspot/cpu/x86/assembler_x86.hpp src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp src/hotspot/cpu/x86/c1_LinearScan_x86.cpp src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp src/hotspot/cpu/x86/c1_Runtime1_x86.cpp src/hotspot/cpu/x86/macroAssembler_x86.cpp src/hotspot/cpu/x86/macroAssembler_x86.hpp src/hotspot/cpu/x86/methodHandles_x86.cpp src/hotspot/cpu/x86/stubGenerator_x86_64.cpp src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp src/hotspot/cpu/x86/x86_64.ad src/hotspot/share/c1/c1_CodeStubs.hpp src/hotspot/share/c1/c1_LIR.cpp src/hotspot/share/c1/c1_LIR.hpp src/hotspot/share/c1/c1_LIRAssembler.cpp src/hotspot/share/c1/c1_LIRAssembler.hpp src/hotspot/share/c1/c1_LinearScan.cpp src/hotspot/share/c1/c1_LinearScan.hpp
diffstat 28 files changed, 385 insertions(+), 413 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -138,18 +138,6 @@
   }
 }
 
-void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
-
-void LIR_Assembler::reset_FPU() { Unimplemented(); }
-
-void LIR_Assembler::fpop() { Unimplemented(); }
-
-void LIR_Assembler::fxch(int i) { Unimplemented(); }
-
-void LIR_Assembler::fld(int i) { Unimplemented(); }
-
-void LIR_Assembler::ffree(int i) { Unimplemented(); }
-
 void LIR_Assembler::breakpoint() { Unimplemented(); }
 
 void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); }
--- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -1127,7 +1127,6 @@
   // arguments of lir_convert
   LIR_Opr conv_input = input;
   LIR_Opr conv_result = result;
-  ConversionStub* stub = NULL;
 
   __ convert(x->op(), conv_input, conv_result);
 
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -86,30 +86,6 @@
 //--------------fpu register translations-----------------------
 
 
-void LIR_Assembler::set_24bit_FPU() {
-  ShouldNotReachHere();
-}
-
-void LIR_Assembler::reset_FPU() {
-  ShouldNotReachHere();
-}
-
-void LIR_Assembler::fpop() {
-  Unimplemented();
-}
-
-void LIR_Assembler::fxch(int i) {
-  Unimplemented();
-}
-
-void LIR_Assembler::fld(int i) {
-  Unimplemented();
-}
-
-void LIR_Assembler::ffree(int i) {
-  Unimplemented();
-}
-
 void LIR_Assembler::breakpoint() {
   __ breakpoint();
 }
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -1726,12 +1726,6 @@
 }
 
 
-void LIR_Assembler::fpop() {
-  Unimplemented();
-  // do nothing
-}
-
-
 void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr thread, LIR_Opr dest, LIR_Op* op) {
   switch (code) {
     case lir_sqrt: {
@@ -2691,16 +2685,6 @@
   }
 }
 
-
-void LIR_Assembler::set_24bit_FPU() {
-  Unimplemented();
-}
-
-void LIR_Assembler::reset_FPU() {
-  Unimplemented();
-}
-
-
 void LIR_Assembler::breakpoint() {
   __ illtrap();
 }
@@ -2894,19 +2878,6 @@
 }
 
 
-void LIR_Assembler::fxch(int i) {
-  Unimplemented();
-}
-
-void LIR_Assembler::fld(int i) {
-  Unimplemented();
-}
-
-void LIR_Assembler::ffree(int i) {
-  Unimplemented();
-}
-
-
 void LIR_Assembler::rt_call(LIR_Opr result, address dest,
                             const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
   // Stubs: Called via rt_call, but dest is a stub address (no function descriptor).
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -1698,10 +1698,6 @@
   }
 }
 
-void LIR_Assembler::fpop() {
-  // do nothing
-}
-
 void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr thread, LIR_Opr dest, LIR_Op* op) {
   switch (code) {
     case lir_sqrt: {
@@ -2739,14 +2735,6 @@
   }
 }
 
-void LIR_Assembler::set_24bit_FPU() {
-  ShouldNotCallThis(); // x86 only
-}
-
-void LIR_Assembler::reset_FPU() {
-  ShouldNotCallThis(); // x86 only
-}
-
 void LIR_Assembler::breakpoint() {
   Unimplemented();
   //  __ breakpoint_trap();
@@ -2887,18 +2875,6 @@
   }
 }
 
-void LIR_Assembler::fxch(int i) {
-  ShouldNotCallThis(); // x86 only
-}
-
-void LIR_Assembler::fld(int i) {
-  ShouldNotCallThis(); // x86 only
-}
-
-void LIR_Assembler::ffree(int i) {
-  ShouldNotCallThis(); // x86 only
-}
-
 void LIR_Assembler::rt_call(LIR_Opr result, address dest,
                             const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
   assert(!tmp->is_valid(), "don't need temporary");
--- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -1732,12 +1732,6 @@
   }
 }
 
-
-void LIR_Assembler::fpop() {
-  // do nothing
-}
-
-
 void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr thread, LIR_Opr dest, LIR_Op* op) {
   switch (code) {
     case lir_tan: {
@@ -2658,16 +2652,6 @@
   }
 }
 
-void LIR_Assembler::set_24bit_FPU() {
-  Unimplemented();
-}
-
-
-void LIR_Assembler::reset_FPU() {
-  Unimplemented();
-}
-
-
 void LIR_Assembler::breakpoint() {
   __ breakpoint_trap();
 }
@@ -3057,19 +3041,6 @@
   }
 }
 
-
-void LIR_Assembler::fxch(int i) {
-  Unimplemented();
-}
-
-void LIR_Assembler::fld(int i) {
-  Unimplemented();
-}
-
-void LIR_Assembler::ffree(int i) {
-  Unimplemented();
-}
-
 void LIR_Assembler::rt_call(LIR_Opr result, address dest,
                             const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
 
--- a/src/hotspot/cpu/x86/assembler_x86.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -7330,9 +7330,7 @@
  emit_int8(0x48 | dst->encoding());
 }
 
-#endif // _LP64
-
-// 64bit typically doesn't use the x87 but needs to for the trig funcs
+// 64bit doesn't use the x87
 
 void Assembler::fabs() {
   emit_int8((unsigned char)0xD9);
@@ -7767,6 +7765,7 @@
   emit_int8((unsigned char)0xD9);
   emit_int8((unsigned char)0xEA);
 }
+#endif // !_LP64
 
 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
@@ -8834,6 +8833,18 @@
   emit_operand(dst, src);
 }
 
+void Assembler::cvttsd2siq(Register dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // F2 REX.W 0F 2C /r
+  // CVTTSD2SI r64, xmm1/m64
+  InstructionMark im(this);
+  emit_int8((unsigned char)0xF2);
+  prefix(REX_W);
+  emit_int8(0x0F);
+  emit_int8(0x2C);
+  emit_operand(dst, src);
+}
+
 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
--- a/src/hotspot/cpu/x86/assembler_x86.hpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp	Thu Jan 30 00:46:43 2020 +0300
@@ -1110,6 +1110,7 @@
   // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
   void cvttsd2sil(Register dst, Address src);
   void cvttsd2sil(Register dst, XMMRegister src);
+  void cvttsd2siq(Register dst, Address src);
   void cvttsd2siq(Register dst, XMMRegister src);
 
   // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
@@ -1137,6 +1138,7 @@
 
   void emms();
 
+#ifndef _LP64
   void fabs();
 
   void fadd(int i);
@@ -1270,17 +1272,18 @@
 
   void fxch(int i = 1);
 
+  void fyl2x();
+  void frndint();
+  void f2xm1();
+  void fldl2e();
+#endif // !_LP64
+
   void fxrstor(Address src);
   void xrstor(Address src);
 
   void fxsave(Address dst);
   void xsave(Address dst);
 
-  void fyl2x();
-  void frndint();
-  void f2xm1();
-  void fldl2e();
-
   void hlt();
 
   void idivl(Register src);
--- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -37,6 +37,7 @@
 
 #define __ ce->masm()->
 
+#ifndef _LP64
 float ConversionStub::float_zero = 0.0;
 double ConversionStub::double_zero = 0.0;
 
@@ -52,7 +53,6 @@
     __ comisd(input()->as_xmm_double_reg(),
               ExternalAddress((address)&double_zero));
   } else {
-    LP64_ONLY(ShouldNotReachHere());
     __ push(rax);
     __ ftst();
     __ fnstsw_ax();
@@ -76,6 +76,7 @@
   __ bind(do_return);
   __ jmp(_continuation);
 }
+#endif // !_LP64
 
 void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -158,15 +158,7 @@
   }
 }
 
-
-void LIR_Assembler::set_24bit_FPU() {
-  __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
-}
-
-void LIR_Assembler::reset_FPU() {
-  __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
-}
-
+#ifndef _LP64
 void LIR_Assembler::fpop() {
   __ fpop();
 }
@@ -182,6 +174,7 @@
 void LIR_Assembler::ffree(int i) {
   __ ffree(i);
 }
+#endif // !_LP64
 
 void LIR_Assembler::breakpoint() {
   __ int3();
@@ -670,6 +663,7 @@
                    InternalAddress(float_constant(c->as_jfloat())));
         }
       } else {
+#ifndef _LP64
         assert(dest->is_single_fpu(), "must be");
         assert(dest->fpu_regnr() == 0, "dest must be TOS");
         if (c->is_zero_float()) {
@@ -679,6 +673,9 @@
         } else {
           __ fld_s (InternalAddress(float_constant(c->as_jfloat())));
         }
+#else
+        ShouldNotReachHere();
+#endif // !_LP64
       }
       break;
     }
@@ -692,6 +689,7 @@
                     InternalAddress(double_constant(c->as_jdouble())));
         }
       } else {
+#ifndef _LP64
         assert(dest->is_double_fpu(), "must be");
         assert(dest->fpu_regnrLo() == 0, "dest must be TOS");
         if (c->is_zero_double()) {
@@ -701,6 +699,9 @@
         } else {
           __ fld_d (InternalAddress(double_constant(c->as_jdouble())));
         }
+#else
+        ShouldNotReachHere();
+#endif // !_LP64
       }
       break;
     }
@@ -892,6 +893,7 @@
     }
 #endif // LP64
 
+#ifndef _LP64
     // special moves from fpu-register to xmm-register
     // necessary for method results
   } else if (src->is_single_xmm() && !dest->is_single_xmm()) {
@@ -907,6 +909,12 @@
     __ fstp_d(Address(rsp, 0));
     __ movdbl(dest->as_xmm_double_reg(), Address(rsp, 0));
 
+  // move between fpu-registers (no instruction necessary because of fpu-stack)
+  } else if (dest->is_single_fpu() || dest->is_double_fpu()) {
+    assert(src->is_single_fpu() || src->is_double_fpu(), "must match");
+    assert(src->fpu() == dest->fpu(), "currently should be nothing to do");
+#endif // !_LP64
+
     // move between xmm-registers
   } else if (dest->is_single_xmm()) {
     assert(src->is_single_xmm(), "must match");
@@ -915,10 +923,6 @@
     assert(src->is_double_xmm(), "must match");
     __ movdbl(dest->as_xmm_double_reg(), src->as_xmm_double_reg());
 
-    // move between fpu-registers (no instruction necessary because of fpu-stack)
-  } else if (dest->is_single_fpu() || dest->is_double_fpu()) {
-    assert(src->is_single_fpu() || src->is_double_fpu(), "must match");
-    assert(src->fpu() == dest->fpu(), "currently should be nothing to do");
   } else {
     ShouldNotReachHere();
   }
@@ -953,6 +957,7 @@
     Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix());
     __ movdbl(dst_addr, src->as_xmm_double_reg());
 
+#ifndef _LP64
   } else if (src->is_single_fpu()) {
     assert(src->fpu_regnr() == 0, "argument must be on TOS");
     Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix());
@@ -964,6 +969,7 @@
     Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix());
     if (pop_fpu_stack)     __ fstp_d (dst_addr);
     else                   __ fst_d  (dst_addr);
+#endif // !_LP64
 
   } else {
     ShouldNotReachHere();
@@ -998,6 +1004,10 @@
   int null_check_here = code_offset();
   switch (type) {
     case T_FLOAT: {
+#ifdef _LP64
+      assert(src->is_single_xmm(), "not a float");
+      __ movflt(as_Address(to_addr), src->as_xmm_float_reg());
+#else
       if (src->is_single_xmm()) {
         __ movflt(as_Address(to_addr), src->as_xmm_float_reg());
       } else {
@@ -1006,10 +1016,15 @@
         if (pop_fpu_stack)      __ fstp_s(as_Address(to_addr));
         else                    __ fst_s (as_Address(to_addr));
       }
+#endif // _LP64
       break;
     }
 
     case T_DOUBLE: {
+#ifdef _LP64
+      assert(src->is_double_xmm(), "not a double");
+      __ movdbl(as_Address(to_addr), src->as_xmm_double_reg());
+#else
       if (src->is_double_xmm()) {
         __ movdbl(as_Address(to_addr), src->as_xmm_double_reg());
       } else {
@@ -1018,6 +1033,7 @@
         if (pop_fpu_stack)      __ fstp_d(as_Address(to_addr));
         else                    __ fst_d (as_Address(to_addr));
       }
+#endif // _LP64
       break;
     }
 
@@ -1134,6 +1150,7 @@
     Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
     __ movdbl(dest->as_xmm_double_reg(), src_addr);
 
+#ifndef _LP64
   } else if (dest->is_single_fpu()) {
     assert(dest->fpu_regnr() == 0, "dest must be TOS");
     Address src_addr = frame_map()->address_for_slot(src->single_stack_ix());
@@ -1143,6 +1160,7 @@
     assert(dest->fpu_regnrLo() == 0, "dest must be TOS");
     Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
     __ fld_d(src_addr);
+#endif // _LP64
 
   } else {
     ShouldNotReachHere();
@@ -1226,9 +1244,13 @@
       if (dest->is_single_xmm()) {
         __ movflt(dest->as_xmm_float_reg(), from_addr);
       } else {
+#ifndef _LP64
         assert(dest->is_single_fpu(), "must be");
         assert(dest->fpu_regnr() == 0, "dest must be TOS");
         __ fld_s(from_addr);
+#else
+        ShouldNotReachHere();
+#endif // !LP64
       }
       break;
     }
@@ -1237,9 +1259,13 @@
       if (dest->is_double_xmm()) {
         __ movdbl(dest->as_xmm_double_reg(), from_addr);
       } else {
+#ifndef _LP64
         assert(dest->is_double_fpu(), "must be");
         assert(dest->fpu_regnrLo() == 0, "dest must be TOS");
         __ fld_d(from_addr);
+#else
+        ShouldNotReachHere();
+#endif // !LP64
       }
       break;
     }
@@ -1495,6 +1521,47 @@
       break;
 
 
+#ifdef _LP64
+    case Bytecodes::_f2d:
+      __ cvtss2sd(dest->as_xmm_double_reg(), src->as_xmm_float_reg());
+      break;
+
+    case Bytecodes::_d2f:
+      __ cvtsd2ss(dest->as_xmm_float_reg(), src->as_xmm_double_reg());
+      break;
+
+    case Bytecodes::_i2f:
+      __ cvtsi2ssl(dest->as_xmm_float_reg(), src->as_register());
+      break;
+
+    case Bytecodes::_i2d:
+      __ cvtsi2sdl(dest->as_xmm_double_reg(), src->as_register());
+      break;
+
+    case Bytecodes::_l2f:
+      __ cvtsi2ssq(dest->as_xmm_float_reg(), src->as_register_lo());
+      break;
+
+    case Bytecodes::_l2d:
+      __ cvtsi2sdq(dest->as_xmm_double_reg(), src->as_register_lo());
+      break;
+
+    case Bytecodes::_f2i:
+      __ convert_f2i(dest->as_register(), src->as_xmm_float_reg());
+      break;
+
+    case Bytecodes::_d2i:
+      __ convert_d2i(dest->as_register(), src->as_xmm_double_reg());
+      break;
+
+    case Bytecodes::_f2l:
+      __ convert_f2l(dest->as_register_lo(), src->as_xmm_float_reg());
+      break;
+
+    case Bytecodes::_d2l:
+      __ convert_d2l(dest->as_register_lo(), src->as_xmm_double_reg());
+      break;
+#else
     case Bytecodes::_f2d:
     case Bytecodes::_d2f:
       if (dest->is_single_xmm()) {
@@ -1520,6 +1587,15 @@
       }
       break;
 
+    case Bytecodes::_l2f:
+    case Bytecodes::_l2d:
+      assert(!dest->is_xmm_register(), "result in xmm register not supported (no SSE instruction present)");
+      assert(dest->fpu() == 0, "result must be on TOS");
+      __ movptr(Address(rsp, 0),          src->as_register_lo());
+      __ movl(Address(rsp, BytesPerWord), src->as_register_hi());
+      __ fild_d(Address(rsp, 0));
+      // float result is rounded later through spilling
+
     case Bytecodes::_f2i:
     case Bytecodes::_d2i:
       if (src->is_single_xmm()) {
@@ -1533,7 +1609,6 @@
         __ movl(dest->as_register(), Address(rsp, 0));
         __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
       }
-
       // IA32 conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub
       assert(op->stub() != NULL, "stub required");
       __ cmpl(dest->as_register(), 0x80000000);
@@ -1541,17 +1616,6 @@
       __ bind(*op->stub()->continuation());
       break;
 
-    case Bytecodes::_l2f:
-    case Bytecodes::_l2d:
-      assert(!dest->is_xmm_register(), "result in xmm register not supported (no SSE instruction present)");
-      assert(dest->fpu() == 0, "result must be on TOS");
-
-      __ movptr(Address(rsp, 0),            src->as_register_lo());
-      NOT_LP64(__ movl(Address(rsp, BytesPerWord), src->as_register_hi()));
-      __ fild_d(Address(rsp, 0));
-      // float result is rounded later through spilling
-      break;
-
     case Bytecodes::_f2l:
     case Bytecodes::_d2l:
       assert(!src->is_xmm_register(), "input in xmm register not supported (no SSE instruction present)");
@@ -1563,6 +1627,7 @@
         __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::fpu2long_stub_id)));
       }
       break;
+#endif // _LP64
 
     default: ShouldNotReachHere();
   }
@@ -2222,6 +2287,7 @@
       }
     }
 
+#ifndef _LP64
   } else if (left->is_single_fpu()) {
     assert(dest->is_single_fpu(),  "fpu stack allocation required");
 
@@ -2297,6 +2363,7 @@
       __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
       __ fmulp(dest->fpu_regnrLo() + 1);
     }
+#endif // !_LP64
 
   } else if (left->is_single_stack() || left->is_address()) {
     assert(left == dest, "left and dest must be equal");
@@ -2339,6 +2406,7 @@
   }
 }
 
+#ifndef _LP64
 void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack) {
   assert(pop_fpu_stack  || (left_index     == dest_index || right_index     == dest_index), "invalid LIR");
   assert(!pop_fpu_stack || (left_index - 1 == dest_index || right_index - 1 == dest_index), "invalid LIR");
@@ -2396,6 +2464,7 @@
       ShouldNotReachHere();
   }
 }
+#endif // _LP64
 
 
 void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_Opr dest, LIR_Op* op) {
@@ -2425,6 +2494,7 @@
       default      : ShouldNotReachHere();
     }
 
+#ifndef _LP64
   } else if (value->is_double_fpu()) {
     assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
     switch(code) {
@@ -2432,6 +2502,7 @@
       case lir_sqrt  : __ fsqrt(); break;
       default      : ShouldNotReachHere();
     }
+#endif // !_LP64
   } else {
     Unimplemented();
   }
@@ -2740,10 +2811,12 @@
       ShouldNotReachHere();
     }
 
+#ifndef _LP64
   } else if(opr1->is_single_fpu() || opr1->is_double_fpu()) {
     assert(opr1->is_fpu_register() && opr1->fpu() == 0, "currently left-hand side must be on TOS (relax this restriction)");
     assert(opr2->is_fpu_register(), "both must be registers");
     __ fcmp(noreg, opr2->fpu(), op->fpu_pop_count() > 0, op->fpu_pop_count() > 1);
+#endif // LP64
 
   } else if (opr1->is_address() && opr2->is_constant()) {
     LIR_Const* c = opr2->as_constant_ptr();
@@ -2787,12 +2860,16 @@
       __ cmpsd2int(left->as_xmm_double_reg(), right->as_xmm_double_reg(), dst->as_register(), code == lir_ucmp_fd2i);
 
     } else {
+#ifdef _LP64
+      ShouldNotReachHere();
+#else
       assert(left->is_single_fpu() || left->is_double_fpu(), "must be");
       assert(right->is_single_fpu() || right->is_double_fpu(), "must match");
 
       assert(left->fpu() == 0, "left must be on TOS");
       __ fcmp2int(dst->as_register(), code == lir_ucmp_fd2i, right->fpu(),
                   op->fpu_pop_count() > 0, op->fpu_pop_count() > 1);
+#endif // LP64
     }
   } else {
     assert(code == lir_cmp_l2i, "check");
@@ -3809,10 +3886,12 @@
       __ xorpd(dest->as_xmm_double_reg(),
                ExternalAddress((address)double_signflip_pool));
     }
+#ifndef _LP64
   } else if (left->is_single_fpu() || left->is_double_fpu()) {
     assert(left->fpu() == 0, "arg must be on TOS");
     assert(dest->fpu() == 0, "dest must be TOS");
     __ fchs();
+#endif // !_LP64
 
   } else {
     ShouldNotReachHere();
@@ -3882,6 +3961,7 @@
       ShouldNotReachHere();
     }
 
+#ifndef _LP64
   } else if (src->is_double_fpu()) {
     assert(src->fpu_regnrLo() == 0, "must be TOS");
     if (dest->is_double_stack()) {
@@ -3901,6 +3981,8 @@
     } else {
       ShouldNotReachHere();
     }
+#endif // !_LP64
+
   } else {
     ShouldNotReachHere();
   }
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp	Thu Jan 30 00:46:43 2020 +0300
@@ -29,8 +29,6 @@
 
   Address::ScaleFactor array_element_size(BasicType type) const;
 
-  void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack);
-
   // helper functions which checks for overflow and sets bailout if it
   // occurs.  Always returns a valid embeddable pointer but in the
   // bailout case the pointer won't be to unique storage.
@@ -62,4 +60,13 @@
   void store_parameter(jobject c,   int offset_from_esp_in_words);
   void store_parameter(Metadata* c, int offset_from_esp_in_words);
 
+#ifndef _LP64
+  void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack);
+
+  void fpop();
+  void fxch(int i);
+  void fld(int i);
+  void ffree(int i);
+#endif // !_LP64
+
 #endif // CPU_X86_C1_LIRASSEMBLER_X86_HPP
--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -386,6 +386,42 @@
     tmp = new_register(T_DOUBLE);
   }
 
+#ifdef _LP64
+  if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) {
+    // frem and drem are implemented as a direct call into the runtime.
+    LIRItem left(x->x(), this);
+    LIRItem right(x->y(), this);
+
+    BasicType bt = as_BasicType(x->type());
+    BasicTypeList signature(2);
+    signature.append(bt);
+    signature.append(bt);
+    CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+
+    const LIR_Opr result_reg = result_register_for(x->type());
+    left.load_item_force(cc->at(0));
+    right.load_item_force(cc->at(1));
+
+    address entry = NULL;
+    switch (x->op()) {
+      case Bytecodes::_frem:
+        entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
+        break;
+      case Bytecodes::_drem:
+        entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+
+    LIR_Opr result = rlock_result(x);
+    __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args());
+    __ move(result_reg, result);
+  } else {
+    arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp(), tmp);
+    set_result(x, round_item(reg));
+  }
+#else
   if ((UseSSE >= 1 && x->op() == Bytecodes::_frem) || (UseSSE >= 2 && x->op() == Bytecodes::_drem)) {
     // special handling for frem and drem: no SSE instruction, so must use FPU with temporary fpu stack slots
     LIR_Opr fpu0, fpu1;
@@ -404,8 +440,8 @@
   } else {
     arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp(), tmp);
   }
-
   set_result(x, round_item(reg));
+#endif // _LP64
 }
 
 
@@ -444,9 +480,6 @@
     case Bytecodes::_ldiv:
       entry = CAST_FROM_FN_PTR(address, SharedRuntime::ldiv);
       break; // check if dividend is 0 is done elsewhere
-    case Bytecodes::_lmul:
-      entry = CAST_FROM_FN_PTR(address, SharedRuntime::lmul);
-      break;
     default:
       ShouldNotReachHere();
     }
@@ -1145,6 +1178,15 @@
 }
 
 void LIRGenerator::do_Convert(Convert* x) {
+#ifdef _LP64
+  LIRItem value(x->value(), this);
+  value.load_item();
+  LIR_Opr input = value.result();
+  LIR_Opr result = rlock(x);
+  __ convert(x->op(), input, result);
+  assert(result->is_virtual(), "result must be virtual register");
+  set_result(x, result);
+#else
   // flags that vary for the different operations and different SSE-settings
   bool fixed_input = false, fixed_result = false, round_result = false, needs_stub = false;
 
@@ -1203,6 +1245,7 @@
 
   assert(result->is_virtual(), "result must be virtual register");
   set_result(x, result);
+#endif // _LP64
 }
 
 
--- a/src/hotspot/cpu/x86/c1_LinearScan_x86.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/c1_LinearScan_x86.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -28,6 +28,11 @@
 #include "utilities/bitMap.inline.hpp"
 
 
+#ifdef _LP64
+void LinearScan::allocate_fpu_stack() {
+  // No FPU stack used on x86-64
+}
+#else
 //----------------------------------------------------------------------
 // Allocation of FPU stack slots (Intel x86 only)
 //----------------------------------------------------------------------
@@ -815,12 +820,6 @@
 #ifndef PRODUCT
 void FpuStackAllocator::check_invalid_lir_op(LIR_Op* op) {
   switch (op->code()) {
-    case lir_24bit_FPU:
-    case lir_reset_FPU:
-    case lir_ffree:
-      assert(false, "operations not allowed in lir. If one of these operations is needed, check if they have fpu operands");
-      break;
-
     case lir_fpop_raw:
     case lir_fxch:
     case lir_fld:
@@ -1139,3 +1138,4 @@
 
   return changed;
 }
+#endif // _LP64
--- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -325,12 +325,12 @@
   if (PreserveFramePointer) {
     mov(rbp, rsp);
   }
-#ifdef TIERED
-  // c2 leaves fpu stack dirty. Clean it on entry
+#if !defined(_LP64) && defined(TIERED)
   if (UseSSE < 2 ) {
+    // c2 leaves fpu stack dirty. Clean it on entry
     empty_FPU_stack();
   }
-#endif // TIERED
+#endif // !_LP64 && TIERED
   decrement(rsp, frame_size_in_bytes); // does not emit code for frame_size == 0
 
   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
@@ -357,7 +357,7 @@
   }
   if (C1Breakpoint)int3();
   // build frame
-  verify_FPU(0, "method_entry");
+  IA32_ONLY( verify_FPU(0, "method_entry"); )
 }
 
 void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
--- a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -427,6 +427,7 @@
 #endif
 
   if (save_fpu_registers) {
+#ifndef _LP64
     if (UseSSE < 2) {
       // save FPU stack
       __ fnsave(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size));
@@ -454,6 +455,7 @@
         offset += 8;
       }
     }
+#endif // !_LP64
 
     if (UseSSE >= 2) {
       // save XMM registers
@@ -473,6 +475,7 @@
         __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
         offset += 8;
       }
+#ifndef _LP64
     } else if (UseSSE == 1) {
       // save XMM registers as float because double not supported without SSE2(num MMX == num fpu)
       int offset = 0;
@@ -481,26 +484,37 @@
         __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
         offset += 8;
       }
+#endif // !_LP64
     }
   }
 
   // FPU stack must be empty now
-  __ verify_FPU(0, "save_live_registers");
+  NOT_LP64( __ verify_FPU(0, "save_live_registers"); )
 }
 
 #undef __
 #define __ sasm->
 
 static void restore_fpu(C1_MacroAssembler* sasm, bool restore_fpu_registers) {
+#ifdef _LP64
+  if (restore_fpu_registers) {
+    // restore XMM registers
+    int xmm_bypass_limit = FrameMap::nof_xmm_regs;
+    if (UseAVX < 3) {
+      xmm_bypass_limit = xmm_bypass_limit / 2;
+    }
+    int offset = 0;
+    for (int n = 0; n < xmm_bypass_limit; n++) {
+      XMMRegister xmm_name = as_XMMRegister(n);
+      __ movdbl(xmm_name, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
+      offset += 8;
+    }
+  }
+#else
   if (restore_fpu_registers) {
     if (UseSSE >= 2) {
       // restore XMM registers
       int xmm_bypass_limit = FrameMap::nof_xmm_regs;
-#ifdef _LP64
-      if (UseAVX < 3) {
-        xmm_bypass_limit = xmm_bypass_limit / 2;
-      }
-#endif
       int offset = 0;
       for (int n = 0; n < xmm_bypass_limit; n++) {
         XMMRegister xmm_name = as_XMMRegister(n);
@@ -523,11 +537,11 @@
       // check that FPU stack is really empty
       __ verify_FPU(0, "restore_live_registers");
     }
-
   } else {
     // check that FPU stack is really empty
     __ verify_FPU(0, "restore_live_registers");
   }
+#endif // _LP64
 
 #ifdef ASSERT
   {
@@ -699,12 +713,12 @@
   default:  ShouldNotReachHere();
   }
 
-#ifdef TIERED
-  // C2 can leave the fpu stack dirty
+#if !defined(_LP64) && defined(TIERED)
   if (UseSSE < 2) {
+    // C2 can leave the fpu stack dirty
     __ empty_FPU_stack();
   }
-#endif // TIERED
+#endif // !_LP64 && TIERED
 
   // verify that only rax, and rdx is valid at this time
   __ invalidate_registers(false, true, true, false, true, true);
@@ -806,7 +820,7 @@
 #endif
 
   // clear the FPU stack in case any FPU results are left behind
-  __ empty_FPU_stack();
+  NOT_LP64( __ empty_FPU_stack(); )
 
   // save exception_oop in callee-saved register to preserve it during runtime calls
   __ verify_not_null_oop(exception_oop);
@@ -1477,11 +1491,23 @@
 
     case fpu2long_stub_id:
       {
+#ifdef _LP64
+        Label done;
+        __ cvttsd2siq(rax, Address(rsp, wordSize));
+        __ cmp64(rax, ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
+        __ jccb(Assembler::notEqual, done);
+        __ movq(rax, Address(rsp, wordSize));
+        __ subptr(rsp, 8);
+        __ movq(Address(rsp, 0), rax);
+        __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
+        __ pop(rax);
+        __ bind(done);
+        __ ret(0);
+#else
         // rax, and rdx are destroyed, but should be free since the result is returned there
         // preserve rsi,ecx
         __ push(rsi);
         __ push(rcx);
-        LP64_ONLY(__ push(rdx);)
 
         // check for NaN
         Label return0, do_return, return_min_jlong, do_convert;
@@ -1526,46 +1552,29 @@
         __ fldz();
         __ fcomp_d(value_low_word);
         __ fnstsw_ax();
-#ifdef _LP64
-        __ testl(rax, 0x4100);  // ZF & CF == 0
-        __ jcc(Assembler::equal, return_min_jlong);
-#else
         __ sahf();
         __ jcc(Assembler::above, return_min_jlong);
-#endif // _LP64
         // return max_jlong
-#ifndef _LP64
         __ movl(rdx, 0x7fffffff);
         __ movl(rax, 0xffffffff);
-#else
-        __ mov64(rax, CONST64(0x7fffffffffffffff));
-#endif // _LP64
         __ jmp(do_return);
 
         __ bind(return_min_jlong);
-#ifndef _LP64
         __ movl(rdx, 0x80000000);
         __ xorl(rax, rax);
-#else
-        __ mov64(rax, UCONST64(0x8000000000000000));
-#endif // _LP64
         __ jmp(do_return);
 
         __ bind(return0);
         __ fpop();
-#ifndef _LP64
         __ xorptr(rdx,rdx);
         __ xorptr(rax,rax);
-#else
-        __ xorptr(rax, rax);
-#endif // _LP64
 
         __ bind(do_return);
         __ addptr(rsp, 32);
-        LP64_ONLY(__ pop(rdx);)
         __ pop(rcx);
         __ pop(rsi);
         __ ret(0);
+#endif // _LP64
       }
       break;
 
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -349,11 +349,6 @@
   pop(rsi);
 }
 
-void MacroAssembler::pop_fTOS() {
-  fld_d(Address(rsp, 0));
-  addl(rsp, 2 * wordSize);
-}
-
 void MacroAssembler::push_callee_saved_registers() {
   push(rsi);
   push(rdi);
@@ -361,12 +356,6 @@
   push(rcx);
 }
 
-void MacroAssembler::push_fTOS() {
-  subl(rsp, 2 * wordSize);
-  fstp_d(Address(rsp, 0));
-}
-
-
 void MacroAssembler::pushoop(jobject obj) {
   push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
 }
@@ -2735,8 +2724,7 @@
   }
 }
 
-// !defined(COMPILER2) is because of stupid core builds
-#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) || INCLUDE_JVMCI
+#ifndef _LP64
 void MacroAssembler::empty_FPU_stack() {
   if (VM_Version::supports_mmx()) {
     emms();
@@ -2744,7 +2732,7 @@
     for (int i = 8; i-- > 0; ) ffree(i);
   }
 }
-#endif // !LP64 || C1 || !C2 || INCLUDE_JVMCI
+#endif // !LP64
 
 
 void MacroAssembler::enter() {
@@ -2765,6 +2753,7 @@
   }
 }
 
+#if !defined(_LP64)
 void MacroAssembler::fcmp(Register tmp) {
   fcmp(tmp, 1, true, true);
 }
@@ -2846,6 +2835,29 @@
   Assembler::fldcw(as_Address(src));
 }
 
+void MacroAssembler::fpop() {
+  ffree();
+  fincstp();
+}
+
+void MacroAssembler::fremr(Register tmp) {
+  save_rax(tmp);
+  { Label L;
+    bind(L);
+    fprem();
+    fwait(); fnstsw_ax();
+    sahf();
+    jcc(Assembler::parity, L);
+  }
+  restore_rax(tmp);
+  // Result is in ST0.
+  // Note: fxch & fpop to get rid of ST1
+  // (otherwise FPU stack could overflow eventually)
+  fxch(1);
+  fpop();
+}
+#endif // !LP64
+
 void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
     Assembler::mulpd(dst, as_Address(src));
@@ -2855,26 +2867,6 @@
   }
 }
 
-void MacroAssembler::increase_precision() {
-  subptr(rsp, BytesPerWord);
-  fnstcw(Address(rsp, 0));
-  movl(rax, Address(rsp, 0));
-  orl(rax, 0x300);
-  push(rax);
-  fldcw(Address(rsp, 0));
-  pop(rax);
-}
-
-void MacroAssembler::restore_precision() {
-  fldcw(Address(rsp, 0));
-  addptr(rsp, BytesPerWord);
-}
-
-void MacroAssembler::fpop() {
-  ffree();
-  fincstp();
-}
-
 void MacroAssembler::load_float(Address src) {
   if (UseSSE >= 1) {
     movflt(xmm0, src);
@@ -2911,28 +2903,6 @@
   }
 }
 
-void MacroAssembler::fremr(Register tmp) {
-  save_rax(tmp);
-  { Label L;
-    bind(L);
-    fprem();
-    fwait(); fnstsw_ax();
-#ifdef _LP64
-    testl(rax, 0x400);
-    jcc(Assembler::notEqual, L);
-#else
-    sahf();
-    jcc(Assembler::parity, L);
-#endif // _LP64
-  }
-  restore_rax(tmp);
-  // Result is in ST0.
-  // Note: fxch & fpop to get rid of ST1
-  // (otherwise FPU stack could overflow eventually)
-  fxch(1);
-  fpop();
-}
-
 // dst = c = a * b + c
 void MacroAssembler::fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c) {
   Assembler::vfmadd231sd(c, a, b);
@@ -5098,6 +5068,7 @@
 }
 
 
+#ifndef _LP64
 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
   static int counter = 0;
   FPU_State* fs = &state->_fpu_state;
@@ -5154,7 +5125,6 @@
   return true;
 }
 
-
 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
   if (!VerifyFPU) return;
   push_CPU_state();
@@ -5174,6 +5144,7 @@
   }
   pop_CPU_state();
 }
+#endif // _LP64
 
 void MacroAssembler::restore_cpu_control_state_after_jni() {
   // Either restore the MXCSR register after returning from the JNI Call
@@ -9888,6 +9859,56 @@
 }
 
 #ifdef _LP64
+void MacroAssembler::convert_f2i(Register dst, XMMRegister src) {
+  Label done;
+  cvttss2sil(dst, src);
+  // Conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub
+  cmpl(dst, 0x80000000); // float_sign_flip
+  jccb(Assembler::notEqual, done);
+  subptr(rsp, 8);
+  movflt(Address(rsp, 0), src);
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
+  pop(dst);
+  bind(done);
+}
+
+void MacroAssembler::convert_d2i(Register dst, XMMRegister src) {
+  Label done;
+  cvttsd2sil(dst, src);
+  // Conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub
+  cmpl(dst, 0x80000000); // float_sign_flip
+  jccb(Assembler::notEqual, done);
+  subptr(rsp, 8);
+  movdbl(Address(rsp, 0), src);
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
+  pop(dst);
+  bind(done);
+}
+
+void MacroAssembler::convert_f2l(Register dst, XMMRegister src) {
+  Label done;
+  cvttss2siq(dst, src);
+  cmp64(dst, ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
+  jccb(Assembler::notEqual, done);
+  subptr(rsp, 8);
+  movflt(Address(rsp, 0), src);
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
+  pop(dst);
+  bind(done);
+}
+
+void MacroAssembler::convert_d2l(Register dst, XMMRegister src) {
+  Label done;
+  cvttsd2siq(dst, src);
+  cmp64(dst, ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
+  jccb(Assembler::notEqual, done);
+  subptr(rsp, 8);
+  movdbl(Address(rsp, 0), src);
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
+  pop(dst);
+  bind(done);
+}
+
 void MacroAssembler::cache_wb(Address line)
 {
   // 64 bit cpus always support clflush
@@ -10000,4 +10021,4 @@
   }
 }
 
-#endif
+#endif // !WIN32 || _LP64
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Thu Jan 30 00:46:43 2020 +0300
@@ -426,6 +426,7 @@
   // Division by power of 2, rounding towards 0
   void division_with_shift(Register reg, int shift_value);
 
+#ifndef _LP64
   // Compares the top-most stack entries on the FPU stack and sets the eflags as follows:
   //
   // CF (corresponds to C0) if x < y
@@ -454,6 +455,10 @@
   // tmp is a temporary register, if none is available use noreg
   void fremr(Register tmp);
 
+  // only if +VerifyFPU
+  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
+#endif // !LP64
+
   // dst = c = a * b + c
   void fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c);
   void fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c);
@@ -473,9 +478,6 @@
   void jC2 (Register tmp, Label& L);
   void jnC2(Register tmp, Label& L);
 
-  // Pop ST (ffree & fincstp combined)
-  void fpop();
-
   // Load float value from 'address'. If UseSSE >= 1, the value is loaded into
   // register xmm0. Otherwise, the value is loaded onto the FPU stack.
   void load_float(Address src);
@@ -492,13 +494,12 @@
   // from register xmm0. Otherwise, the value is stored from the FPU stack.
   void store_double(Address dst);
 
-  // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
-  void push_fTOS();
-
-  // pops double TOS element from CPU stack and pushes on FPU stack
-  void pop_fTOS();
+#ifndef _LP64
+  // Pop ST (ffree & fincstp combined)
+  void fpop();
 
   void empty_FPU_stack();
+#endif // !_LP64
 
   void push_IU_state();
   void pop_IU_state();
@@ -609,9 +610,6 @@
 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
 
-  // only if +VerifyFPU
-  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
-
   // Verify or restore cpu control state after JNI call
   void restore_cpu_control_state_after_jni();
 
@@ -902,6 +900,7 @@
   void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
   void comisd(XMMRegister dst, AddressLiteral src);
 
+#ifndef _LP64
   void fadd_s(Address src)        { Assembler::fadd_s(src); }
   void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); }
 
@@ -920,6 +919,7 @@
 
   void fmul_s(Address src)        { Assembler::fmul_s(src); }
   void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); }
+#endif // _LP64
 
   void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
   void ldmxcsr(AddressLiteral src);
@@ -1082,9 +1082,6 @@
                 Register rax, Register rcx, Register rdx, Register tmp);
 #endif
 
-  void increase_precision();
-  void restore_precision();
-
 private:
 
   // these are private because users should be doing movflt/movdbl
@@ -1813,6 +1810,11 @@
                           XMMRegister tmp1, Register tmp2);
 
 #ifdef _LP64
+  void convert_f2i(Register dst, XMMRegister src);
+  void convert_d2i(Register dst, XMMRegister src);
+  void convert_f2l(Register dst, XMMRegister src);
+  void convert_d2l(Register dst, XMMRegister src);
+
   void cache_wb(Address line);
   void cache_wbsync(bool is_pre);
 #endif // _LP64
--- a/src/hotspot/cpu/x86/methodHandles_x86.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/methodHandles_x86.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -604,7 +604,10 @@
   // robust stack walking implemented in trace_method_handle_stub.
 
   // save FP result, valid at some call sites (adapter_opt_return_float, ...)
-  __ increment(rsp, -2 * wordSize);
+  __ decrement(rsp, 2 * wordSize);
+#ifdef _LP64
+  __ movdbl(Address(rsp, 0), xmm0);
+#else
   if  (UseSSE >= 2) {
     __ movdbl(Address(rsp, 0), xmm0);
   } else if (UseSSE == 1) {
@@ -612,6 +615,7 @@
   } else {
     __ fst_d(Address(rsp, 0));
   }
+#endif // LP64
 
   // Incoming state:
   // rcx: method handle
@@ -626,6 +630,9 @@
   __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub_wrapper), rsp);
   __ increment(rsp, sizeof(MethodHandleStubArguments));
 
+#ifdef _LP64
+  __ movdbl(xmm0, Address(rsp, 0));
+#else
   if  (UseSSE >= 2) {
     __ movdbl(xmm0, Address(rsp, 0));
   } else if (UseSSE == 1) {
@@ -633,6 +640,7 @@
   } else {
     __ fld_d(Address(rsp, 0));
   }
+#endif // LP64
   __ increment(rsp, 2 * wordSize);
 
   __ popa();
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -6341,6 +6341,16 @@
 
     StubRoutines::x86::_verify_mxcsr_entry    = generate_verify_mxcsr();
 
+    StubRoutines::x86::_f2i_fixup             = generate_f2i_fixup();
+    StubRoutines::x86::_f2l_fixup             = generate_f2l_fixup();
+    StubRoutines::x86::_d2i_fixup             = generate_d2i_fixup();
+    StubRoutines::x86::_d2l_fixup             = generate_d2l_fixup();
+
+    StubRoutines::x86::_float_sign_mask       = generate_fp_mask("float_sign_mask",  0x7FFFFFFF7FFFFFFF);
+    StubRoutines::x86::_float_sign_flip       = generate_fp_mask("float_sign_flip",  0x8000000080000000);
+    StubRoutines::x86::_double_sign_mask      = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
+    StubRoutines::x86::_double_sign_flip      = generate_fp_mask("double_sign_flip", 0x8000000000000000);
+
     // Build this early so it's available for the interpreter.
     StubRoutines::_throw_StackOverflowError_entry =
       generate_throw_exception("StackOverflowError throw_exception",
@@ -6364,7 +6374,7 @@
       StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
       StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
     }
-    if (VM_Version::supports_sse2() && UseLibmIntrinsic && InlineIntrinsics) {
+    if (UseLibmIntrinsic && InlineIntrinsics) {
       if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) ||
           vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) ||
           vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
@@ -6432,15 +6442,6 @@
                                                 throw_NullPointerException_at_call));
 
     // entry points that are platform specific
-    StubRoutines::x86::_f2i_fixup = generate_f2i_fixup();
-    StubRoutines::x86::_f2l_fixup = generate_f2l_fixup();
-    StubRoutines::x86::_d2i_fixup = generate_d2i_fixup();
-    StubRoutines::x86::_d2l_fixup = generate_d2l_fixup();
-
-    StubRoutines::x86::_float_sign_mask  = generate_fp_mask("float_sign_mask",  0x7FFFFFFF7FFFFFFF);
-    StubRoutines::x86::_float_sign_flip  = generate_fp_mask("float_sign_flip",  0x8000000080000000);
-    StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
-    StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
     StubRoutines::x86::_vector_float_sign_mask = generate_vector_mask("vector_float_sign_mask", 0x7FFFFFFF7FFFFFFF);
     StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask("vector_float_sign_flip", 0x8000000080000000);
     StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask("vector_double_sign_mask", 0x7FFFFFFFFFFFFFFF);
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -432,25 +432,14 @@
     } else {
       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan));
     }
+  } else if (kind == Interpreter::java_lang_math_abs) {
+    assert(StubRoutines::x86::double_sign_mask() != NULL, "not initialized");
+    __ movdbl(xmm0, Address(rsp, wordSize));
+    __ andpd(xmm0, ExternalAddress(StubRoutines::x86::double_sign_mask()));
   } else {
-    __ fld_d(Address(rsp, wordSize));
-    switch (kind) {
-    case Interpreter::java_lang_math_abs:
-      __ fabs();
-      break;
-    default:
-      ShouldNotReachHere();
-    }
-
-    // return double result in xmm0 for interpreter and compilers.
-    __ subptr(rsp, 2*wordSize);
-    // Round to 64bit precision
-    __ fstp_d(Address(rsp, 0));
-    __ movdbl(xmm0, Address(rsp, 0));
-    __ addptr(rsp, 2*wordSize);
+    ShouldNotReachHere();
   }
 
-
   __ pop(rax);
   __ mov(rsp, r13);
   __ jmp(rax);
--- a/src/hotspot/cpu/x86/x86_64.ad	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/cpu/x86/x86_64.ad	Thu Jan 30 00:46:43 2020 +0300
@@ -10588,25 +10588,9 @@
 %{
   match(Set dst (ConvF2I src));
   effect(KILL cr);
-
-  format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
-            "cmpl    $dst, #0x80000000\n\t"
-            "jne,s   done\n\t"
-            "subq    rsp, #8\n\t"
-            "movss   [rsp], $src\n\t"
-            "call    f2i_fixup\n\t"
-            "popq    $dst\n"
-    "done:   "%}
-  ins_encode %{
-    Label done;
-    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
-    __ cmpl($dst$$Register, 0x80000000);
-    __ jccb(Assembler::notEqual, done);
-    __ subptr(rsp, 8);
-    __ movflt(Address(rsp, 0), $src$$XMMRegister);
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
-    __ pop($dst$$Register);
-    __ bind(done);
+  format %{ "convert_f2i $dst,$src" %}
+  ins_encode %{
+    __ convert_f2i($dst$$Register, $src$$XMMRegister);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -10615,26 +10599,9 @@
 %{
   match(Set dst (ConvF2L src));
   effect(KILL cr);
-
-  format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
-            "cmpq    $dst, [0x8000000000000000]\n\t"
-            "jne,s   done\n\t"
-            "subq    rsp, #8\n\t"
-            "movss   [rsp], $src\n\t"
-            "call    f2l_fixup\n\t"
-            "popq    $dst\n"
-    "done:   "%}
-  ins_encode %{
-    Label done;
-    __ cvttss2siq($dst$$Register, $src$$XMMRegister);
-    __ cmp64($dst$$Register,
-             ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
-    __ jccb(Assembler::notEqual, done);
-    __ subptr(rsp, 8);
-    __ movflt(Address(rsp, 0), $src$$XMMRegister);
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
-    __ pop($dst$$Register);
-    __ bind(done);
+  format %{ "convert_f2l $dst,$src"%}
+  ins_encode %{
+    __ convert_f2l($dst$$Register, $src$$XMMRegister);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -10643,25 +10610,9 @@
 %{
   match(Set dst (ConvD2I src));
   effect(KILL cr);
-
-  format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
-            "cmpl    $dst, #0x80000000\n\t"
-            "jne,s   done\n\t"
-            "subq    rsp, #8\n\t"
-            "movsd   [rsp], $src\n\t"
-            "call    d2i_fixup\n\t"
-            "popq    $dst\n"
-    "done:   "%}
-  ins_encode %{
-    Label done;
-    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
-    __ cmpl($dst$$Register, 0x80000000);
-    __ jccb(Assembler::notEqual, done);
-    __ subptr(rsp, 8);
-    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
-    __ pop($dst$$Register);
-    __ bind(done);
+  format %{ "convert_d2i $dst,$src"%}
+  ins_encode %{
+    __ convert_d2i($dst$$Register, $src$$XMMRegister);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -10670,26 +10621,9 @@
 %{
   match(Set dst (ConvD2L src));
   effect(KILL cr);
-
-  format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
-            "cmpq    $dst, [0x8000000000000000]\n\t"
-            "jne,s   done\n\t"
-            "subq    rsp, #8\n\t"
-            "movsd   [rsp], $src\n\t"
-            "call    d2l_fixup\n\t"
-            "popq    $dst\n"
-    "done:   "%}
-  ins_encode %{
-    Label done;
-    __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
-    __ cmp64($dst$$Register,
-             ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
-    __ jccb(Assembler::notEqual, done);
-    __ subptr(rsp, 8);
-    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
-    __ pop($dst$$Register);
-    __ bind(done);
+  format %{ "convert_d2l $dst,$src"%}
+  ins_encode %{
+    __ convert_d2l($dst$$Register, $src$$XMMRegister);
   %}
   ins_pipe(pipe_slow);
 %}
--- a/src/hotspot/share/c1/c1_CodeStubs.hpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/share/c1/c1_CodeStubs.hpp	Thu Jan 30 00:46:43 2020 +0300
@@ -123,6 +123,7 @@
  public:
   ConversionStub(Bytecodes::Code bytecode, LIR_Opr input, LIR_Opr result)
     : _bytecode(bytecode), _input(input), _result(result) {
+    NOT_IA32( ShouldNotReachHere(); ) // used only on x86-32
   }
 
   Bytecodes::Code bytecode() { return _bytecode; }
--- a/src/hotspot/share/c1/c1_LIR.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/share/c1/c1_LIR.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -424,8 +424,6 @@
     case lir_backwardbranch_target:    // result and info always invalid
     case lir_build_frame:              // result and info always invalid
     case lir_fpop_raw:                 // result and info always invalid
-    case lir_24bit_FPU:                // result and info always invalid
-    case lir_reset_FPU:                // result and info always invalid
     case lir_breakpoint:               // result and info always invalid
     case lir_membar:                   // result and info always invalid
     case lir_membar_acquire:           // result and info always invalid
@@ -467,7 +465,6 @@
 // LIR_Op1
     case lir_fxch:           // input always valid, result and info always invalid
     case lir_fld:            // input always valid, result and info always invalid
-    case lir_ffree:          // input always valid, result and info always invalid
     case lir_push:           // input always valid, result and info always invalid
     case lir_pop:            // input always valid, result and info always invalid
     case lir_return:         // input always valid, result and info always invalid
@@ -1649,14 +1646,11 @@
      case lir_osr_entry:             s = "osr_entry";     break;
      case lir_build_frame:           s = "build_frm";     break;
      case lir_fpop_raw:              s = "fpop_raw";      break;
-     case lir_24bit_FPU:             s = "24bit_FPU";     break;
-     case lir_reset_FPU:             s = "reset_FPU";     break;
      case lir_breakpoint:            s = "breakpoint";    break;
      case lir_get_thread:            s = "get_thread";    break;
      // LIR_Op1
      case lir_fxch:                  s = "fxch";          break;
      case lir_fld:                   s = "fld";           break;
-     case lir_ffree:                 s = "ffree";         break;
      case lir_push:                  s = "push";          break;
      case lir_pop:                   s = "pop";           break;
      case lir_null_check:            s = "null_check";    break;
--- a/src/hotspot/share/c1/c1_LIR.hpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/share/c1/c1_LIR.hpp	Thu Jan 30 00:46:43 2020 +0300
@@ -888,8 +888,6 @@
       , lir_osr_entry
       , lir_build_frame
       , lir_fpop_raw
-      , lir_24bit_FPU
-      , lir_reset_FPU
       , lir_breakpoint
       , lir_rtcall
       , lir_membar
@@ -905,7 +903,6 @@
   , begin_op1
       , lir_fxch
       , lir_fld
-      , lir_ffree
       , lir_push
       , lir_pop
       , lir_null_check
@@ -2232,8 +2229,6 @@
   void unlock_object(LIR_Opr hdr, LIR_Opr obj, LIR_Opr lock, LIR_Opr scratch, CodeStub* stub);
   void lock_object(LIR_Opr hdr, LIR_Opr obj, LIR_Opr lock, LIR_Opr scratch, CodeStub* stub, CodeEmitInfo* info);
 
-  void set_24bit_fpu()                                               { append(new LIR_Op0(lir_24bit_FPU )); }
-  void restore_fpu()                                                 { append(new LIR_Op0(lir_reset_FPU )); }
   void breakpoint()                                                  { append(new LIR_Op0(lir_breakpoint)); }
 
   void arraycopy(LIR_Opr src, LIR_Opr src_pos, LIR_Opr dst, LIR_Opr dst_pos, LIR_Opr length, LIR_Opr tmp, ciArrayKlass* expected_type, int flags, CodeEmitInfo* info) { append(new LIR_OpArrayCopy(src, src_pos, dst, dst_pos, length, tmp, expected_type, flags, info)); }
--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -481,7 +481,7 @@
     compilation()->set_has_method_handle_invokes(true);
   }
 
-#if defined(X86) && defined(TIERED)
+#if defined(IA32) && defined(TIERED)
   // C2 leave fpu stack dirty clean it
   if (UseSSE < 2) {
     int i;
@@ -532,6 +532,7 @@
       safepoint_poll(op->in_opr(), op->info());
       break;
 
+#ifdef IA32
     case lir_fxch:
       fxch(op->in_opr()->as_jint());
       break;
@@ -539,10 +540,7 @@
     case lir_fld:
       fld(op->in_opr()->as_jint());
       break;
-
-    case lir_ffree:
-      ffree(op->in_opr()->as_jint());
-      break;
+#endif // IA32
 
     case lir_branch:
       break;
@@ -636,22 +634,16 @@
       osr_entry();
       break;
 
-    case lir_24bit_FPU:
-      set_24bit_FPU();
+#ifdef IA32
+    case lir_fpop_raw:
+      fpop();
       break;
-
-    case lir_reset_FPU:
-      reset_FPU();
-      break;
+#endif // IA32
 
     case lir_breakpoint:
       breakpoint();
       break;
 
-    case lir_fpop_raw:
-      fpop();
-      break;
-
     case lir_membar:
       membar();
       break;
--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp	Thu Jan 30 00:46:43 2020 +0300
@@ -105,13 +105,6 @@
   ImplicitNullCheckStub* add_debug_info_for_null_check(int pc_offset, CodeEmitInfo* cinfo);
   ImplicitNullCheckStub* add_debug_info_for_null_check_here(CodeEmitInfo* info);
 
-  void set_24bit_FPU();
-  void reset_FPU();
-  void fpop();
-  void fxch(int i);
-  void fld(int i);
-  void ffree(int i);
-
   void breakpoint();
   void push(LIR_Opr opr);
   void pop(LIR_Opr opr);
--- a/src/hotspot/share/c1/c1_LinearScan.cpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/share/c1/c1_LinearScan.cpp	Thu Jan 30 00:46:43 2020 +0300
@@ -90,7 +90,7 @@
  , _has_call(0)
  , _interval_in_loop(0)  // initialized later with correct length
  , _scope_value_cache(0) // initialized later with correct length
-#ifdef X86
+#ifdef IA32
  , _fpu_stack_allocator(NULL)
 #endif
 {
@@ -2653,13 +2653,15 @@
 #endif
 
   } else if (opr->is_single_fpu()) {
-#ifdef X86
+#ifdef IA32
     // the exact location of fpu stack values is only known
     // during fpu stack allocation, so the stack allocator object
     // must be present
     assert(use_fpu_stack_allocation(), "should not have float stack values without fpu stack allocation (all floats must be SSE2)");
     assert(_fpu_stack_allocator != NULL, "must be present");
     opr = _fpu_stack_allocator->to_fpu_stack(opr);
+#elif defined(AMD64)
+    assert(false, "FPU not used on x86-64");
 #endif
 
     Location::Type loc_type = float_saved_as_double ? Location::float_in_dbl : Location::normal;
@@ -2764,7 +2766,7 @@
       // name for the other half.  *first and *second must represent the
       // least and most significant words, respectively.
 
-#ifdef X86
+#ifdef IA32
       // the exact location of fpu stack values is only known
       // during fpu stack allocation, so the stack allocator object
       // must be present
@@ -2774,6 +2776,9 @@
 
       assert(opr->fpu_regnrLo() == opr->fpu_regnrHi(), "assumed in calculation (only fpu_regnrLo is used)");
 #endif
+#ifdef AMD64
+      assert(false, "FPU not used on x86-64");
+#endif
 #ifdef SPARC
       assert(opr->fpu_regnrLo() == opr->fpu_regnrHi() + 1, "assumed in calculation (only fpu_regnrHi is used)");
 #endif
--- a/src/hotspot/share/c1/c1_LinearScan.hpp	Wed Jan 29 22:37:17 2020 +0100
+++ b/src/hotspot/share/c1/c1_LinearScan.hpp	Thu Jan 30 00:46:43 2020 +0300
@@ -177,7 +177,7 @@
   bool          is_interval_in_loop(int interval, int loop) const { return _interval_in_loop.at(interval, loop); }
 
   // handling of fpu stack allocation (platform dependent, needed for debug information generation)
-#ifdef X86
+#ifdef IA32
   FpuStackAllocator* _fpu_stack_allocator;
   bool use_fpu_stack_allocation() const          { return UseSSE < 2 && has_fpu_registers(); }
 #else