OpenJDK / jdk / hs
changeset 46954:6ad56f307810
8185786: AArch64: disable some address reshapings.
Summary: LoadS/LoadUS's address reshapings are disabled on Arm Cortex-A family for performance.
Reviewed-by: adinn, aph
Contributed-by: zhongwei.yao@linaro.org
author | njian |
---|---|
date | Wed, 16 Aug 2017 14:48:41 +0800 |
parents | 39063b484ec2 |
children | 1a5279da09ee |
files | hotspot/src/cpu/aarch64/vm/aarch64.ad hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp |
diffstat | 2 files changed, 24 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Wed Aug 23 10:25:25 2017 +0200 +++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Wed Aug 16 14:48:41 2017 +0800 @@ -3806,15 +3806,24 @@ // Any use that can't embed the address computation? for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) { Node* u = addp->fast_out(i); - if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) { + if (!u->is_Mem()) { + return; + } + if (u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) { return; } + if (addp2->in(AddPNode::Offset)->Opcode() != Op_ConvI2L) { + int scale = 1 << addp2->in(AddPNode::Offset)->in(2)->get_int(); + if (VM_Version::expensive_load(u->as_Mem()->memory_size(), scale)) { + return; + } + } } - + Node* off = addp->in(AddPNode::Offset); Node* addr2 = addp2->in(AddPNode::Address); Node* base = addp->in(AddPNode::Base); - + Node* new_addr = NULL; // Check whether the graph already has the new AddP we need // before we create one (no GVN available here). @@ -3828,7 +3837,7 @@ break; } } - + if (new_addr == NULL) { new_addr = new AddPNode(base, addr2, off); }
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp Wed Aug 23 10:25:25 2017 +0200 +++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp Wed Aug 16 14:48:41 2017 +0800 @@ -56,6 +56,17 @@ static void assert_is_initialized() { } + static bool expensive_load(int ld_size, int scale) { + if (cpu_family() == CPU_ARM) { + // Half-word load with index shift by 1 (aka scale is 2) has + // extra cycle latency, e.g. ldrsh w0, [x1,w2,sxtw #1]. + if (ld_size == 2 && scale == 2) { + return true; + } + } + return false; + } + enum Family { CPU_ARM = 'A', CPU_BROADCOM = 'B',