OpenJDK / aarch32-port / jdk9-arm3264 / hotspot
changeset 8297:e0ad42748972
Merge
author | amurillo |
---|---|
date | Fri, 01 May 2015 03:56:01 -0700 |
parents | 2ac9b6b36689 5f2ef612ed74 |
children | a0df4738688e |
files | agent/src/share/classes/sun/jvm/hotspot/memory/SharedHeap.java agent/src/share/classes/sun/jvm/hotspot/runtime/VirtualSpace.java src/cpu/aarch64/vm/aarch64.ad src/cpu/aarch64/vm/interp_masm_aarch64.hpp src/cpu/aarch64/vm/macroAssembler_aarch64.cpp src/cpu/x86/vm/rtmLocking.cpp src/cpu/x86/vm/templateTable_x86_32.hpp src/cpu/x86/vm/templateTable_x86_64.hpp src/share/vm/gc_implementation/shared/parGCAllocBuffer.cpp src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp src/share/vm/gc_implementation/shared/parGCAllocBuffer.inline.hpp src/share/vm/memory/sharedHeap.cpp src/share/vm/memory/sharedHeap.hpp src/share/vm/oops/instanceClassLoaderKlass.cpp src/share/vm/oops/klassPS.hpp src/share/vm/oops/oop.pcgc.inline.hpp src/share/vm/oops/oop.psgc.inline.hpp src/share/vm/runtime/arguments.cpp src/share/vm/runtime/virtualspace.cpp src/share/vm/runtime/virtualspace.hpp test/sanity/WhiteBox.java |
diffstat | 547 files changed, 18308 insertions(+), 10649 deletions(-) [+] |
line wrap: on
line diff
--- a/agent/src/os/bsd/libproc_impl.c Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/os/bsd/libproc_impl.c Fri May 01 03:56:01 2015 -0700 @@ -215,7 +215,12 @@ return NULL; } - strncpy(newlib->name, libname, sizeof(newlib->name)); + if (strlen(libname) >= sizeof(newlib->name)) { + print_debug("libname %s too long\n", libname); + return NULL; + } + strcpy(newlib->name, libname); + newlib->base = base; if (fd == -1) {
--- a/agent/src/os/linux/libproc_impl.c Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/os/linux/libproc_impl.c Fri May 01 03:56:01 2015 -0700 @@ -159,7 +159,12 @@ return NULL; } - strncpy(newlib->name, libname, sizeof(newlib->name)); + if (strlen(libname) >= sizeof(newlib->name)) { + print_debug("libname %s too long\n", libname); + return NULL; + } + strcpy(newlib->name, libname); + newlib->base = base; if (fd == -1) {
--- a/agent/src/share/classes/com/sun/java/swing/action/ActionManager.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/com/sun/java/swing/action/ActionManager.java Fri May 01 03:56:01 2015 -0700 @@ -46,6 +46,11 @@ return manager; } + protected static void setInstance(ActionManager m) + { + manager = m; + } + protected abstract void addActions(); protected void addAction(String cmdname, Action action) @@ -90,6 +95,6 @@ private HashMap actions; private static ActionUtilities utilities = new ActionUtilities(); - protected static ActionManager manager; + private static ActionManager manager; }
--- a/agent/src/share/classes/com/sun/java/swing/ui/CommonToolBar.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/com/sun/java/swing/ui/CommonToolBar.java Fri May 01 03:56:01 2015 -0700 @@ -46,7 +46,7 @@ { this.manager = manager; statusBar = status; - buttonSize = new Dimension(CommonUI.buttconPrefSize); + buttonSize = new Dimension(CommonUI.getButtconPrefSize()); buttonInsets = new Insets(0, 0, 0, 0); addComponents(); }
--- a/agent/src/share/classes/com/sun/java/swing/ui/CommonUI.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/com/sun/java/swing/ui/CommonUI.java Fri May 01 03:56:01 2015 -0700 @@ -373,20 +373,25 @@ comp.setCursor(Cursor.getPredefinedCursor(0)); } - public static final int BUTTON_WIDTH = 100; - public static final int BUTTON_HEIGHT = 26; - public static final int BUTTCON_WIDTH = 28; - public static final int BUTTCON_HEIGHT = 28; - public static final int SM_BUTTON_WIDTH = 72; - public static final int SM_BUTTON_HEIGHT = 26; - public static final int LABEL_WIDTH = 100; - public static final int LABEL_HEIGHT = 20; - public static final int TEXT_WIDTH = 150; - public static final int TEXT_HEIGHT = 20; - public static Dimension buttonPrefSize = new Dimension(100, 26); - public static Dimension buttconPrefSize = new Dimension(28, 28); - public static Dimension smbuttonPrefSize = new Dimension(72, 26); - public static Dimension labelPrefSize = new Dimension(100, 20); - public static Dimension textPrefSize = new Dimension(150, 20); + public static Dimension getButtconPrefSize() + { + return buttconPrefSize; + } + + private static final int BUTTON_WIDTH = 100; + private static final int BUTTON_HEIGHT = 26; + private static final int BUTTCON_WIDTH = 28; + private static final int BUTTCON_HEIGHT = 28; + private static final int SM_BUTTON_WIDTH = 72; + private static final int SM_BUTTON_HEIGHT = 26; + private static final int LABEL_WIDTH = 100; + private static final int LABEL_HEIGHT = 20; + private static final int TEXT_WIDTH = 150; + private static final int TEXT_HEIGHT = 20; + private static final Dimension buttonPrefSize = new Dimension(100, 26); + private static final Dimension buttconPrefSize = new Dimension(28, 28); + private static final Dimension smbuttonPrefSize = new Dimension(72, 26); + private static final Dimension labelPrefSize = new Dimension(100, 20); + private static final Dimension textPrefSize = new Dimension(150, 20); }
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciMethodData.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciMethodData.java Fri May 01 03:56:01 2015 -0700 @@ -148,7 +148,7 @@ ParametersTypeData<ciKlass,ciMethod> parametersTypeData() { Address base = getAddress().addOffsetTo(origField.getOffset()); int di = (int)parametersTypeDataDi.getValue(base); - if (di == -1) { + if (di == -1 || di == -2) { return null; } DataLayout dataLayout = new DataLayout(dataField.getValue(getAddress()), di);
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java Fri May 01 03:56:01 2015 -0700 @@ -29,9 +29,9 @@ import java.util.Observer; import sun.jvm.hotspot.debugger.Address; +import sun.jvm.hotspot.gc_interface.CollectedHeap; import sun.jvm.hotspot.gc_interface.CollectedHeapName; import sun.jvm.hotspot.memory.MemRegion; -import sun.jvm.hotspot.memory.SharedHeap; import sun.jvm.hotspot.memory.SpaceClosure; import sun.jvm.hotspot.runtime.VM; import sun.jvm.hotspot.runtime.VMObjectFactory; @@ -41,7 +41,7 @@ // Mirror class for G1CollectedHeap. -public class G1CollectedHeap extends SharedHeap { +public class G1CollectedHeap extends CollectedHeap { // HeapRegionManager _hrm; static private long hrmFieldOffset; // MemRegion _g1_reserved;
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_interface/CollectedHeap.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/gc_interface/CollectedHeap.java Fri May 01 03:56:01 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2005, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,7 +32,7 @@ import sun.jvm.hotspot.runtime.*; import sun.jvm.hotspot.types.*; -public class CollectedHeap extends VMObject { +public abstract class CollectedHeap extends VMObject { private static long reservedFieldOffset; static { @@ -73,9 +73,7 @@ return reservedRegion().contains(a); } - public CollectedHeapName kind() { - return CollectedHeapName.ABSTRACT; - } + public abstract CollectedHeapName kind(); public void print() { printOn(System.out); } public void printOn(PrintStream tty) {
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_interface/CollectedHeapName.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/gc_interface/CollectedHeapName.java Fri May 01 03:56:01 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -31,8 +31,6 @@ private CollectedHeapName(String name) { this.name = name; } - public static final CollectedHeapName ABSTRACT = new CollectedHeapName("abstract"); - public static final CollectedHeapName SHARED_HEAP = new CollectedHeapName("SharedHeap"); public static final CollectedHeapName GEN_COLLECTED_HEAP = new CollectedHeapName("GenCollectedHeap"); public static final CollectedHeapName G1_COLLECTED_HEAP = new CollectedHeapName("G1CollectedHeap"); public static final CollectedHeapName PARALLEL_SCAVENGE_HEAP = new CollectedHeapName("ParallelScavengeHeap");
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/GenCollectedHeap.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/memory/GenCollectedHeap.java Fri May 01 03:56:01 2015 -0700 @@ -33,8 +33,7 @@ import sun.jvm.hotspot.types.*; import sun.jvm.hotspot.utilities.*; -public class GenCollectedHeap extends SharedHeap { - private static CIntegerField nGensField; +public class GenCollectedHeap extends CollectedHeap { private static AddressField youngGenField; private static AddressField oldGenField; @@ -54,7 +53,6 @@ private static synchronized void initialize(TypeDataBase db) { Type type = db.lookupType("GenCollectedHeap"); - nGensField = type.getCIntegerField("_n_gens"); youngGenField = type.getAddressField("_young_gen"); oldGenField = type.getAddressField("_old_gen"); @@ -70,7 +68,7 @@ } public int nGens() { - return (int) nGensField.getValue(addr); + return 2; // Young + Old } public Generation getGen(int i) {
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/SharedHeap.java Thu Apr 30 17:20:25 2015 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -package sun.jvm.hotspot.memory; - -import java.io.*; -import java.util.*; - -import sun.jvm.hotspot.debugger.*; -import sun.jvm.hotspot.gc_interface.*; -import sun.jvm.hotspot.runtime.*; -import sun.jvm.hotspot.types.*; - -public abstract class SharedHeap extends CollectedHeap { - private static VirtualConstructor ctor; - - static { - VM.registerVMInitializedObserver(new Observer() { - public void update(Observable o, Object data) { - initialize(VM.getVM().getTypeDataBase()); - } - }); - } - - private static synchronized void initialize(TypeDataBase db) { - Type type = db.lookupType("SharedHeap"); - ctor = new VirtualConstructor(db); - } - - public SharedHeap(Address addr) { - super(addr); - } - - public CollectedHeapName kind() { - return CollectedHeapName.SHARED_HEAP; - } - }
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java Fri May 01 03:56:01 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -112,11 +112,7 @@ return ""; } public CollectedHeap heap() { - try { - return (CollectedHeap) heapConstructor.instantiateWrapperFor(collectedHeapField.getValue()); - } catch (WrongTypeException e) { - return new CollectedHeap(collectedHeapField.getValue()); - } + return (CollectedHeap) heapConstructor.instantiateWrapperFor(collectedHeapField.getValue()); } public static long getNarrowOopBase() {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/memory/VirtualSpace.java Fri May 01 03:56:01 2015 -0700 @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2000, 2002, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.memory; + +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.types.*; + +public class VirtualSpace extends VMObject { + private static AddressField lowField; + private static AddressField highField; + private static AddressField lowBoundaryField; + private static AddressField highBoundaryField; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("VirtualSpace"); + + lowField = type.getAddressField("_low"); + highField = type.getAddressField("_high"); + lowBoundaryField = type.getAddressField("_low_boundary"); + highBoundaryField = type.getAddressField("_high_boundary"); + } + + public VirtualSpace(Address addr) { + super(addr); + } + + public Address low() { return lowField.getValue(addr); } + public Address high() { return highField.getValue(addr); } + public Address lowBoundary() { return lowBoundaryField.getValue(addr); } + public Address highBoundary() { return highBoundaryField.getValue(addr); } + + /** Testers (all sizes are byte sizes) */ + public long committedSize() { return high().minus(low()); } + public long reservedSize() { return highBoundary().minus(lowBoundary()); } + public long uncommittedSize() { return reservedSize() - committedSize(); } + public boolean contains(Address addr) { return (low().lessThanOrEqual(addr) && addr.lessThan(high())); } +}
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java Fri May 01 03:56:01 2015 -0700 @@ -328,7 +328,7 @@ } public Symbol getUnresolvedStringAt(int which) { - return getSymbolAt(which); + return getSlotAt(which).getSymbol(); } // returns null, if not resolved.
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java Fri May 01 03:56:01 2015 -0700 @@ -42,10 +42,10 @@ private static synchronized void initialize(TypeDataBase db) throws WrongTypeException { Type type = db.lookupType("PhaseCFG"); - numBlocksField = new CIntField(type.getCIntegerField("_num_blocks"), 0); + numBlocksField = new CIntField(type.getCIntegerField("_number_of_blocks"), 0); blocksField = type.getAddressField("_blocks"); bbsField = type.getAddressField("_node_to_block_mapping"); - brootField = type.getAddressField("_broot"); + brootField = type.getAddressField("_root_block"); } private static CIntField numBlocksField;
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java Fri May 01 03:56:01 2015 -0700 @@ -259,8 +259,7 @@ saProps = new Properties(); URL url = null; try { - url = VM.class.getClassLoader().getResource("sa.properties"); - saProps.load(new BufferedInputStream(url.openStream())); + saProps.load(VM.class.getResourceAsStream("/sa.properties")); } catch (Exception e) { System.err.println("Unable to load properties " + (url == null ? "null" : url.toString()) +
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VirtualSpace.java Thu Apr 30 17:20:25 2015 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2000, 2002, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -package sun.jvm.hotspot.runtime; - -import java.util.*; -import sun.jvm.hotspot.debugger.*; -import sun.jvm.hotspot.types.*; - -public class VirtualSpace extends VMObject { - private static AddressField lowField; - private static AddressField highField; - private static AddressField lowBoundaryField; - private static AddressField highBoundaryField; - - static { - VM.registerVMInitializedObserver(new Observer() { - public void update(Observable o, Object data) { - initialize(VM.getVM().getTypeDataBase()); - } - }); - } - - private static synchronized void initialize(TypeDataBase db) { - Type type = db.lookupType("VirtualSpace"); - - lowField = type.getAddressField("_low"); - highField = type.getAddressField("_high"); - lowBoundaryField = type.getAddressField("_low_boundary"); - highBoundaryField = type.getAddressField("_high_boundary"); - } - - public VirtualSpace(Address addr) { - super(addr); - } - - public Address low() { return lowField.getValue(addr); } - public Address high() { return highField.getValue(addr); } - public Address lowBoundary() { return lowBoundaryField.getValue(addr); } - public Address highBoundary() { return highBoundaryField.getValue(addr); } - - /** Testers (all sizes are byte sizes) */ - public long committedSize() { return high().minus(low()); } - public long reservedSize() { return highBoundary().minus(lowBoundary()); } - public long uncommittedSize() { return reservedSize() - committedSize(); } - public boolean contains(Address addr) { return (low().lessThanOrEqual(addr) && addr.lessThan(high())); } -}
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java Fri May 01 03:56:01 2015 -0700 @@ -314,26 +314,17 @@ //------------------------------------------------------------------------------ // frame::adjust_unextended_sp private void adjustUnextendedSP() { - // If we are returning to a compiled MethodHandle call site, the - // saved_fp will in fact be a saved value of the unextended SP. The - // simplest way to tell whether we are returning to such a call site - // is as follows: + // On x86, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. CodeBlob cb = cb(); NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); if (senderNm != null) { - // If the sender PC is a deoptimization point, get the original - // PC. For MethodHandle call site the unextended_sp is stored in - // saved_fp. - if (senderNm.isDeoptMhEntry(getPC())) { - // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP())); - raw_unextendedSP = getFP(); - } - else if (senderNm.isDeoptEntry(getPC())) { - // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp)); - } - else if (senderNm.isMethodHandleReturn(getPC())) { - raw_unextendedSP = getFP(); + // If the sender PC is a deoptimization point, get the original PC. + if (senderNm.isDeoptEntry(getPC()) || + senderNm.isDeoptMhEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); } } }
--- a/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java Fri May 01 03:56:01 2015 -0700 @@ -81,53 +81,48 @@ System.out.println(); System.out.println("Heap Usage:"); - if (heap instanceof SharedHeap) { - SharedHeap sharedHeap = (SharedHeap) heap; - if (sharedHeap instanceof GenCollectedHeap) { - GenCollectedHeap genHeap = (GenCollectedHeap) sharedHeap; - for (int n = 0; n < genHeap.nGens(); n++) { - Generation gen = genHeap.getGen(n); - if (gen instanceof sun.jvm.hotspot.memory.DefNewGeneration) { - System.out.println("New Generation (Eden + 1 Survivor Space):"); - printGen(gen); + if (heap instanceof GenCollectedHeap) { + GenCollectedHeap genHeap = (GenCollectedHeap) heap; + for (int n = 0; n < genHeap.nGens(); n++) { + Generation gen = genHeap.getGen(n); + if (gen instanceof sun.jvm.hotspot.memory.DefNewGeneration) { + System.out.println("New Generation (Eden + 1 Survivor Space):"); + printGen(gen); - ContiguousSpace eden = ((DefNewGeneration)gen).eden(); - System.out.println("Eden Space:"); - printSpace(eden); + ContiguousSpace eden = ((DefNewGeneration)gen).eden(); + System.out.println("Eden Space:"); + printSpace(eden); - ContiguousSpace from = ((DefNewGeneration)gen).from(); - System.out.println("From Space:"); - printSpace(from); + ContiguousSpace from = ((DefNewGeneration)gen).from(); + System.out.println("From Space:"); + printSpace(from); - ContiguousSpace to = ((DefNewGeneration)gen).to(); - System.out.println("To Space:"); - printSpace(to); - } else { - System.out.println(gen.name() + ":"); - printGen(gen); - } + ContiguousSpace to = ((DefNewGeneration)gen).to(); + System.out.println("To Space:"); + printSpace(to); + } else { + System.out.println(gen.name() + ":"); + printGen(gen); } - } else if (sharedHeap instanceof G1CollectedHeap) { - G1CollectedHeap g1h = (G1CollectedHeap) sharedHeap; - G1MonitoringSupport g1mm = g1h.g1mm(); - long edenRegionNum = g1mm.edenRegionNum(); - long survivorRegionNum = g1mm.survivorRegionNum(); - HeapRegionSetBase oldSet = g1h.oldSet(); - HeapRegionSetBase humongousSet = g1h.humongousSet(); - long oldRegionNum = oldSet.count().length() - + humongousSet.count().capacity() / HeapRegion.grainBytes(); - printG1Space("G1 Heap:", g1h.n_regions(), - g1h.used(), g1h.capacity()); - System.out.println("G1 Young Generation:"); - printG1Space("Eden Space:", edenRegionNum, - g1mm.edenUsed(), g1mm.edenCommitted()); - printG1Space("Survivor Space:", survivorRegionNum, - g1mm.survivorUsed(), g1mm.survivorCommitted()); - printG1Space("G1 Old Generation:", oldRegionNum, - g1mm.oldUsed(), g1mm.oldCommitted()); - } else { - throw new RuntimeException("unknown SharedHeap type : " + heap.getClass()); } + } else if (heap instanceof G1CollectedHeap) { + G1CollectedHeap g1h = (G1CollectedHeap) heap; + G1MonitoringSupport g1mm = g1h.g1mm(); + long edenRegionNum = g1mm.edenRegionNum(); + long survivorRegionNum = g1mm.survivorRegionNum(); + HeapRegionSetBase oldSet = g1h.oldSet(); + HeapRegionSetBase humongousSet = g1h.humongousSet(); + long oldRegionNum = oldSet.count().length() + + humongousSet.count().capacity() / HeapRegion.grainBytes(); + printG1Space("G1 Heap:", g1h.n_regions(), + g1h.used(), g1h.capacity()); + System.out.println("G1 Young Generation:"); + printG1Space("Eden Space:", edenRegionNum, + g1mm.edenUsed(), g1mm.edenCommitted()); + printG1Space("Survivor Space:", survivorRegionNum, + g1mm.survivorUsed(), g1mm.survivorCommitted()); + printG1Space("G1 Old Generation:", oldRegionNum, + g1mm.oldUsed(), g1mm.oldCommitted()); } else if (heap instanceof ParallelScavengeHeap) { ParallelScavengeHeap psh = (ParallelScavengeHeap) heap; PSYoungGen youngGen = psh.youngGen();
--- a/agent/src/share/classes/sun/jvm/hotspot/ui/action/HSDBActionManager.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/ui/action/HSDBActionManager.java Fri May 01 03:56:01 2015 -0700 @@ -32,10 +32,12 @@ public class HSDBActionManager extends ActionManager { public static ActionManager getInstance() { - if (manager == null) { - manager = new HSDBActionManager(); + ActionManager m = ActionManager.getInstance(); + if (m == null) { + m = new HSDBActionManager(); + ActionManager.setInstance(m); } - return manager; + return m; } protected void addActions() {
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/HeapHprofBinWriter.java Thu Apr 30 17:20:25 2015 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/HeapHprofBinWriter.java Fri May 01 03:56:01 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2004, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -799,6 +799,18 @@ writeObjectID(klass.getJavaMirror()); ClassData cd = (ClassData) classDataCache.get(klass); + if (cd == null) { + // The class is not present in the system dictionary, probably Lambda. + // Add it to cache here + if (klass instanceof InstanceKlass) { + InstanceKlass ik = (InstanceKlass) klass; + List fields = getInstanceFields(ik); + int instSize = getSizeForFields(fields); + cd = new ClassData(instSize, fields); + classDataCache.put(ik, cd); + } + } + if (Assert.ASSERTS_ENABLED) { Assert.that(cd != null, "can not get class data for " + klass.getName().asString() + klass.getAddress()); }
--- a/make/Makefile Thu Apr 30 17:20:25 2015 -0700 +++ b/make/Makefile Fri May 01 03:56:01 2015 -0700 @@ -98,7 +98,7 @@ COMMON_VM_OPTIMIZED_TARGETS=optimized optimized1 docs export_optimized # JDK directory list -JDK_DIRS=bin include jre lib demo +JDK_DIRS=bin include lib demo all: all_product all_fastdebug @@ -373,33 +373,33 @@ $(install-file) $(EXPORT_LIB_DIR)/%.lib: $(C2_BUILD_DIR)/%.lib $(install-file) -$(EXPORT_JRE_BIN_DIR)/%.diz: $(C2_BUILD_DIR)/%.diz +$(EXPORT_BIN_DIR)/%.diz: $(C2_BUILD_DIR)/%.diz $(install-file) -$(EXPORT_JRE_BIN_DIR)/%.dll: $(C2_BUILD_DIR)/%.dll +$(EXPORT_BIN_DIR)/%.dll: $(C2_BUILD_DIR)/%.dll $(install-file) -$(EXPORT_JRE_BIN_DIR)/%.pdb: $(C2_BUILD_DIR)/%.pdb +$(EXPORT_BIN_DIR)/%.pdb: $(C2_BUILD_DIR)/%.pdb $(install-file) -$(EXPORT_JRE_BIN_DIR)/%.map: $(C2_BUILD_DIR)/%.map +$(EXPORT_BIN_DIR)/%.map: $(C2_BUILD_DIR)/%.map $(install-file) # Unix -$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(C2_BUILD_DIR)/%.$(LIBRARY_SUFFIX) +$(EXPORT_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(C2_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX): $(C2_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) $(EXPORT_SERVER_DIR)/64/%.$(LIBRARY_SUFFIX): $(C2_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) -$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo: $(C2_BUILD_DIR)/%.debuginfo +$(EXPORT_LIB_ARCH_DIR)/%.debuginfo: $(C2_BUILD_DIR)/%.debuginfo $(install-file) $(EXPORT_SERVER_DIR)/%.debuginfo: $(C2_BUILD_DIR)/%.debuginfo $(install-file) $(EXPORT_SERVER_DIR)/64/%.debuginfo: $(C2_BUILD_DIR)/%.debuginfo $(install-file) -$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz: $(C2_BUILD_DIR)/%.diz +$(EXPORT_LIB_ARCH_DIR)/%.diz: $(C2_BUILD_DIR)/%.diz $(install-file) $(EXPORT_SERVER_DIR)/64/%.diz: $(C2_BUILD_DIR)/%.diz $(install-file) # MacOS X -$(EXPORT_JRE_LIB_ARCH_DIR)/%.dSYM: $(C2_BUILD_DIR)/%.dSYM +$(EXPORT_LIB_ARCH_DIR)/%.dSYM: $(C2_BUILD_DIR)/%.dSYM $(install-dir) $(EXPORT_SERVER_DIR)/%.dSYM: $(C2_BUILD_DIR)/%.dSYM $(install-dir) @@ -423,33 +423,33 @@ $(install-file) $(EXPORT_LIB_DIR)/%.lib: $(C1_BUILD_DIR)/%.lib $(install-file) -$(EXPORT_JRE_BIN_DIR)/%.diz: $(C1_BUILD_DIR)/%.diz +$(EXPORT_BIN_DIR)/%.diz: $(C1_BUILD_DIR)/%.diz $(install-file) -$(EXPORT_JRE_BIN_DIR)/%.dll: $(C1_BUILD_DIR)/%.dll +$(EXPORT_BIN_DIR)/%.dll: $(C1_BUILD_DIR)/%.dll $(install-file) -$(EXPORT_JRE_BIN_DIR)/%.pdb: $(C1_BUILD_DIR)/%.pdb +$(EXPORT_BIN_DIR)/%.pdb: $(C1_BUILD_DIR)/%.pdb $(install-file) -$(EXPORT_JRE_BIN_DIR)/%.map: $(C1_BUILD_DIR)/%.map +$(EXPORT_BIN_DIR)/%.map: $(C1_BUILD_DIR)/%.map $(install-file) # Unix -$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(C1_BUILD_DIR)/%.$(LIBRARY_SUFFIX) +$(EXPORT_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(C1_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) $(EXPORT_CLIENT_DIR)/%.$(LIBRARY_SUFFIX): $(C1_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) $(EXPORT_CLIENT_DIR)/64/%.$(LIBRARY_SUFFIX): $(C1_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) -$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo: $(C1_BUILD_DIR)/%.debuginfo +$(EXPORT_LIB_ARCH_DIR)/%.debuginfo: $(C1_BUILD_DIR)/%.debuginfo $(install-file) $(EXPORT_CLIENT_DIR)/%.debuginfo: $(C1_BUILD_DIR)/%.debuginfo $(install-file) $(EXPORT_CLIENT_DIR)/64/%.debuginfo: $(C1_BUILD_DIR)/%.debuginfo $(install-file) -$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz: $(C1_BUILD_DIR)/%.diz +$(EXPORT_LIB_ARCH_DIR)/%.diz: $(C1_BUILD_DIR)/%.diz $(install-file) $(EXPORT_CLIENT_DIR)/64/%.diz: $(C1_BUILD_DIR)/%.diz $(install-file) # MacOS X -$(EXPORT_JRE_LIB_ARCH_DIR)/%.dSYM: $(C1_BUILD_DIR)/%.dSYM +$(EXPORT_LIB_ARCH_DIR)/%.dSYM: $(C1_BUILD_DIR)/%.dSYM $(install-dir) $(EXPORT_CLIENT_DIR)/%.dSYM: $(C1_BUILD_DIR)/%.dSYM $(install-dir) @@ -473,28 +473,28 @@ $(install-file) $(EXPORT_LIB_DIR)/%.lib: $(MINIMAL1_BUILD_DIR)/%.lib $(install-file) -$(EXPORT_JRE_BIN_DIR)/%.diz: $(MINIMAL1_BUILD_DIR)/%.diz +$(EXPORT_BIN_DIR)/%.diz: $(MINIMAL1_BUILD_DIR)/%.diz $(install-file) -$(EXPORT_JRE_BIN_DIR)/%.dll: $(MINIMAL1_BUILD_DIR)/%.dll +$(EXPORT_BIN_DIR)/%.dll: $(MINIMAL1_BUILD_DIR)/%.dll $(install-file) -$(EXPORT_JRE_BIN_DIR)/%.pdb: $(MINIMAL1_BUILD_DIR)/%.pdb +$(EXPORT_BIN_DIR)/%.pdb: $(MINIMAL1_BUILD_DIR)/%.pdb $(install-file) -$(EXPORT_JRE_BIN_DIR)/%.map: $(MINIMAL1_BUILD_DIR)/%.map +$(EXPORT_BIN_DIR)/%.map: $(MINIMAL1_BUILD_DIR)/%.map $(install-file) # Unix -$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(MINIMAL1_BUILD_DIR)/%.$(LIBRARY_SUFFIX) +$(EXPORT_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(MINIMAL1_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) $(EXPORT_MINIMAL_DIR)/%.$(LIBRARY_SUFFIX): $(MINIMAL1_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) $(EXPORT_MINIMAL_DIR)/64/%.$(LIBRARY_SUFFIX): $(MINIMAL1_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) -$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo: $(MINIMAL1_BUILD_DIR)/%.debuginfo +$(EXPORT_LIB_ARCH_DIR)/%.debuginfo: $(MINIMAL1_BUILD_DIR)/%.debuginfo $(install-file) $(EXPORT_MINIMAL_DIR)/%.debuginfo: $(MINIMAL1_BUILD_DIR)/%.debuginfo $(install-file) $(EXPORT_MINIMAL_DIR)/64/%.debuginfo: $(MINIMAL1_BUILD_DIR)/%.debuginfo $(install-file) -$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz: $(MINIMAL1_BUILD_DIR)/%.diz +$(EXPORT_LIB_ARCH_DIR)/%.diz: $(MINIMAL1_BUILD_DIR)/%.diz $(install-file) $(EXPORT_MINIMAL_DIR)/64/%.diz: $(MINIMAL1_BUILD_DIR)/%.diz $(install-file) @@ -509,11 +509,11 @@ $(EXPORT_INCLUDE_DIR)/%: $(ZERO_BUILD_DIR)/../generated/jvmtifiles/% $(install-file) # Unix -$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(ZERO_BUILD_DIR)/%.$(LIBRARY_SUFFIX) +$(EXPORT_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(ZERO_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) -$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo: $(ZERO_BUILD_DIR)/%.debuginfo +$(EXPORT_LIB_ARCH_DIR)/%.debuginfo: $(ZERO_BUILD_DIR)/%.debuginfo $(install-file) -$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz: $(ZERO_BUILD_DIR)/%.diz +$(EXPORT_LIB_ARCH_DIR)/%.diz: $(ZERO_BUILD_DIR)/%.diz $(install-file) $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX): $(ZERO_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) @@ -522,7 +522,7 @@ $(EXPORT_SERVER_DIR)/%.diz: $(ZERO_BUILD_DIR)/%.diz $(install-file) # MacOS X -$(EXPORT_JRE_LIB_ARCH_DIR)/%.dSYM: $(ZERO_BUILD_DIR)/%.dSYM +$(EXPORT_LIB_ARCH_DIR)/%.dSYM: $(ZERO_BUILD_DIR)/%.dSYM $(install-dir) $(EXPORT_SERVER_DIR)/%.dSYM: $(ZERO_BUILD_DIR)/%.dSYM $(install-dir) @@ -536,11 +536,11 @@ $(EXPORT_INCLUDE_DIR)/%: $(CORE_BUILD_DIR)/../generated/jvmtifiles/% $(install-file) # Unix -$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(CORE_BUILD_DIR)/%.$(LIBRARY_SUFFIX) +$(EXPORT_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(CORE_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) -$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo: $(CORE_BUILD_DIR)/%.debuginfo +$(EXPORT_LIB_ARCH_DIR)/%.debuginfo: $(CORE_BUILD_DIR)/%.debuginfo $(install-file) -$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz: $(CORE_BUILD_DIR)/%.diz +$(EXPORT_LIB_ARCH_DIR)/%.diz: $(CORE_BUILD_DIR)/%.diz $(install-file) $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX): $(CORE_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) @@ -558,11 +558,11 @@ $(EXPORT_INCLUDE_DIR)/%: $(SHARK_BUILD_DIR)/../generated/jvmtifiles/% $(install-file) # Unix -$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(SHARK_BUILD_DIR)/%.$(LIBRARY_SUFFIX) +$(EXPORT_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(SHARK_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) -$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo): $(SHARK_BUILD_DIR)/%.debuginfo +$(EXPORT_LIB_ARCH_DIR)/%.debuginfo): $(SHARK_BUILD_DIR)/%.debuginfo $(install-file) -$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz: $(SHARK_BUILD_DIR)/%.diz +$(EXPORT_LIB_ARCH_DIR)/%.diz: $(SHARK_BUILD_DIR)/%.diz $(install-file) $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX): $(SHARK_BUILD_DIR)/%.$(LIBRARY_SUFFIX) $(install-file) @@ -571,7 +571,7 @@ $(EXPORT_SERVER_DIR)/%.diz: $(SHARK_BUILD_DIR)/%.diz $(install-file) # MacOS X -$(EXPORT_JRE_LIB_ARCH_DIR)/%.dSYM: $(SHARK_BUILD_DIR)/%.dSYM +$(EXPORT_LIB_ARCH_DIR)/%.dSYM: $(SHARK_BUILD_DIR)/%.dSYM $(install-dir) $(EXPORT_SERVER_DIR)/%.dSYM: $(SHARK_BUILD_DIR)/%.dSYM $(install-dir)
--- a/make/aix/makefiles/adlc.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/aix/makefiles/adlc.make Fri May 01 03:56:01 2015 -0700 @@ -1,5 +1,5 @@ # -# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -140,13 +140,7 @@ # Note "+="; it is a hook so flags.make can add more flags, like -g or -DFOO. ADLCFLAGS += -q -T -# Normally, debugging is done directly on the ad_<arch>*.cpp files. -# But -g will put #line directives in those files pointing back to <arch>.ad. -# Some builds of gcc 3.2 have a bug that gets tickled by the extra #line directives -# so skip it for 3.2 and ealier. -ifneq "$(shell expr \( $(CC_VER_MAJOR) \> 3 \) \| \( \( $(CC_VER_MAJOR) = 3 \) \& \( $(CC_VER_MINOR) \>= 3 \) \))" "0" ADLCFLAGS += -g -endif ifdef LP64 ADLCFLAGS += -D_LP64
--- a/make/aix/makefiles/defs.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/aix/makefiles/defs.make Fri May 01 03:56:01 2015 -0700 @@ -184,17 +184,17 @@ EXPORT_LIST += $(EXPORT_DOCS_DIR)/platform/jvmti/jvmti.html # client and server subdirectories have symbolic links to ../libjsig.so -EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX) +EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX) #ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) # ifeq ($(ZIP_DEBUGINFO_FILES),1) -# EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.diz +# EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.diz # else -# EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.debuginfo +# EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.debuginfo # endif #endif -EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server -EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client -EXPORT_MINIMAL_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/minimal +EXPORT_SERVER_DIR = $(EXPORT_LIB_ARCH_DIR)/server +EXPORT_CLIENT_DIR = $(EXPORT_LIB_ARCH_DIR)/client +EXPORT_MINIMAL_DIR = $(EXPORT_LIB_ARCH_DIR)/minimal ifeq ($(findstring true, $(JVM_VARIANT_SERVER) $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK) $(JVM_VARIANT_CORE)), true) EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
--- a/make/aix/makefiles/mapfile-vers-debug Thu Apr 30 17:20:25 2015 -0700 +++ b/make/aix/makefiles/mapfile-vers-debug Fri May 01 03:56:01 2015 -0700 @@ -19,7 +19,7 @@ # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA # or visit www.oracle.com if you need additional information or have any # questions. -# +# # # Define public interface. @@ -107,6 +107,7 @@ JVM_GetClassTypeAnnotations; JVM_GetDeclaredClasses; JVM_GetDeclaringClass; + JVM_GetSimpleBinaryName; JVM_GetEnclosingMethodInfo; JVM_GetFieldIxModifiers; JVM_GetFieldTypeAnnotations;
--- a/make/aix/makefiles/mapfile-vers-product Thu Apr 30 17:20:25 2015 -0700 +++ b/make/aix/makefiles/mapfile-vers-product Fri May 01 03:56:01 2015 -0700 @@ -19,7 +19,7 @@ # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA # or visit www.oracle.com if you need additional information or have any # questions. -# +# # # Define public interface. @@ -107,6 +107,7 @@ JVM_GetClassTypeAnnotations; JVM_GetDeclaredClasses; JVM_GetDeclaringClass; + JVM_GetSimpleBinaryName; JVM_GetEnclosingMethodInfo; JVM_GetFieldIxModifiers; JVM_GetInheritedAccessControlContext;
--- a/make/aix/makefiles/ppc64.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/aix/makefiles/ppc64.make Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ # -# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. -# Copyright 2012, 2013 SAP AG. All rights reserved. +# Copyright (c) 2004, 2015, Oracle and/or its affiliates. All rights reserved. +# Copyright 2012, 2015 SAP AG. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -71,9 +71,6 @@ OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT) OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT) -# xlc 10.01 parameters for ipa compile. -QIPA_COMPILE=$(if $(CXX_IS_V10),-qipa) - # Xlc 10.1 parameters for aggressive optimization: # - qhot=level=1: Most aggressive loop optimizations. # - qignerrno: Assume errno is not modified by system calls. @@ -88,7 +85,7 @@ OPT_CFLAGS/synchronizer.o = $(OPT_CFLAGS) -qnoinline # Set all the xlC V10.1 options here. -OPT_CFLAGS += $(QIPA_COMPILE) $(QV10_OPT) $(QV10_OPT_AGGRESSIVE) +OPT_CFLAGS += $(QV10_OPT) $(QV10_OPT_AGGRESSIVE) export OBJECT_MODE=64
--- a/make/aix/makefiles/vm.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/aix/makefiles/vm.make Fri May 01 03:56:01 2015 -0700 @@ -122,7 +122,7 @@ # By default, link the *.o into the library, not the executable. LINK_INTO$(LINK_INTO) = LIBJVM -JDK_LIBDIR = $(JAVA_HOME)/jre/lib/$(LIBARCH) +JDK_LIBDIR = $(JAVA_HOME)/lib/$(LIBARCH) #---------------------------------------------------------------------- # jvm_db & dtrace
--- a/make/aix/makefiles/xlc.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/aix/makefiles/xlc.make Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ # -# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. -# Copyright (c) 2012, 2013 SAP. All rights reserved. +# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2012, 2015 SAP. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -34,13 +34,17 @@ AS = $(CC) -c -# get xlc version -CXX_VERSION := $(shell $(CXX) -qversion 2>&1 | sed -n 's/.*Version: \([0-9.]*\)/\1/p') +# get xlc version which comes as VV.RR.MMMM.LLLL where 'VV' is the version, +# 'RR' is the release, 'MMMM' is the modification and 'LLLL' is the level. +# We only use 'VV.RR.LLLL' to avoid integer overflows in bash when comparing +# the version numbers (some shells only support 32-bit integer compares!). +CXX_VERSION := $(shell $(CXX) -qversion 2>&1 | \ + sed -n 's/.*Version: \([0-9]\{2\}\).\([0-9]\{2\}\).[0-9]\{4\}.\([0-9]\{4\}\)/\1\2\3/p') # xlc 08.00.0000.0023 and higher supports -qtune=balanced -CXX_SUPPORTS_BALANCED_TUNING=$(shell if [ $(subst .,,$(CXX_VERSION)) -ge 080000000023 ] ; then echo "true" ; fi) +CXX_SUPPORTS_BALANCED_TUNING := $(shell if [ $(CXX_VERSION) -ge 08000023 ] ; then echo "true" ; fi) # xlc 10.01 is used with aggressive optimizations to boost performance -CXX_IS_V10=$(shell if [ $(subst .,,$(CXX_VERSION)) -ge 100100000000 ] ; then echo "true" ; fi) +CXX_IS_V10 := $(shell if [ $(CXX_VERSION) -ge 10010000 ] ; then echo "true" ; fi) # check for precompiled headers support
--- a/make/bsd/makefiles/defs.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/bsd/makefiles/defs.make Fri May 01 03:56:01 2015 -0700 @@ -265,23 +265,23 @@ EXPORT_LIST += $(EXPORT_DOCS_DIR)/platform/jvmti/jvmti.html # client and server subdirectories have symbolic links to ../libjsig.so -EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX) +EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX) ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) ifeq ($(ZIP_DEBUGINFO_FILES),1) - EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.diz + EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.diz else ifeq ($(OS_VENDOR), Darwin) - EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX).dSYM + EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX).dSYM else - EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.debuginfo + EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.debuginfo endif endif endif -EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server -EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client -EXPORT_MINIMAL_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/minimal +EXPORT_SERVER_DIR = $(EXPORT_LIB_ARCH_DIR)/server +EXPORT_CLIENT_DIR = $(EXPORT_LIB_ARCH_DIR)/client +EXPORT_MINIMAL_DIR = $(EXPORT_LIB_ARCH_DIR)/minimal ifeq ($(findstring true, $(JVM_VARIANT_SERVER) $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true) EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt @@ -324,34 +324,34 @@ # Serviceability Binaries # No SA Support for PPC, IA64, ARM or zero -ADD_SA_BINARIES/x86 = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ +ADD_SA_BINARIES/x86 = $(EXPORT_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ $(EXPORT_LIB_DIR)/sa-jdi.jar ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) ifeq ($(ZIP_DEBUGINFO_FILES),1) - ADD_SA_BINARIES/x86 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz + ADD_SA_BINARIES/x86 += $(EXPORT_LIB_ARCH_DIR)/libsaproc.diz else ifeq ($(OS_VENDOR), Darwin) - ADD_SA_BINARIES/x86 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX).dSYM + ADD_SA_BINARIES/x86 += $(EXPORT_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX).dSYM else - ADD_SA_BINARIES/x86 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo + ADD_SA_BINARIES/x86 += $(EXPORT_LIB_ARCH_DIR)/libsaproc.debuginfo endif endif endif -ADD_SA_BINARIES/sparc = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ +ADD_SA_BINARIES/sparc = $(EXPORT_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ $(EXPORT_LIB_DIR)/sa-jdi.jar -ADD_SA_BINARIES/universal = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ +ADD_SA_BINARIES/universal = $(EXPORT_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ $(EXPORT_LIB_DIR)/sa-jdi.jar ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) ifeq ($(ZIP_DEBUGINFO_FILES),1) - ADD_SA_BINARIES/universal += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz + ADD_SA_BINARIES/universal += $(EXPORT_LIB_ARCH_DIR)/libsaproc.diz else ifeq ($(OS_VENDOR), Darwin) - ADD_SA_BINARIES/universal += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX).dSYM + ADD_SA_BINARIES/universal += $(EXPORT_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX).dSYM else - ADD_SA_BINARIES/universal += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo + ADD_SA_BINARIES/universal += $(EXPORT_LIB_ARCH_DIR)/libsaproc.debuginfo endif endif endif @@ -388,25 +388,25 @@ endif # Binaries to 'universalize' if built - UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/libjsig.$(LIBRARY_SUFFIX) - UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/libsaproc.$(LIBRARY_SUFFIX) - UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/server/libjvm.$(LIBRARY_SUFFIX) - UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/client/libjvm.$(LIBRARY_SUFFIX) + UNIVERSAL_LIPO_LIST += $(EXPORT_LIB_DIR)/libjsig.$(LIBRARY_SUFFIX) + UNIVERSAL_LIPO_LIST += $(EXPORT_LIB_DIR)/libsaproc.$(LIBRARY_SUFFIX) + UNIVERSAL_LIPO_LIST += $(EXPORT_LIB_DIR)/server/libjvm.$(LIBRARY_SUFFIX) + UNIVERSAL_LIPO_LIST += $(EXPORT_LIB_DIR)/client/libjvm.$(LIBRARY_SUFFIX) # Files to simply copy in place - UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/server/Xusage.txt - UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/client/Xusage.txt + UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/server/Xusage.txt + UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/client/Xusage.txt ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) ifeq ($(ZIP_DEBUGINFO_FILES),1) - UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/server/libjvm.diz - UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/client/libjvm.diz - UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/libjsig.diz - UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/libsaproc.diz + UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/server/libjvm.diz + UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/client/libjvm.diz + UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/libjsig.diz + UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/libsaproc.diz else - UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/server/libjvm.$(LIBRARY_SUFFIX).dSYM - UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/client/libjvm.$(LIBRARY_SUFFIX).dSYM - UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/libjsig.$(LIBRARY_SUFFIX).dSYM - UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/libsaproc.$(LIBRARY_SUFFIX).dSYM + UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/server/libjvm.$(LIBRARY_SUFFIX).dSYM + UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/client/libjvm.$(LIBRARY_SUFFIX).dSYM + UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/libjsig.$(LIBRARY_SUFFIX).dSYM + UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/libsaproc.$(LIBRARY_SUFFIX).dSYM endif endif
--- a/make/bsd/makefiles/gcc.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/bsd/makefiles/gcc.make Fri May 01 03:56:01 2015 -0700 @@ -313,22 +313,13 @@ # Work around some compiler bugs. ifeq ($(USE_CLANG), true) - # Clang 4.2 - ifeq ($(shell expr $(CC_VER_MAJOR) = 4 \& $(CC_VER_MINOR) = 2), 1) - OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT) - OPT_CFLAGS/unsafe.o += -O1 - # Clang 5.0 - else ifeq ($(shell expr $(CC_VER_MAJOR) = 5 \& $(CC_VER_MINOR) = 0), 1) + # Clang <= 6.1 + ifeq ($(shell expr \ + $(CC_VER_MAJOR) \< 6 \| \ + \( $(CC_VER_MAJOR) = 6 \& $(CC_VER_MINOR) \<= 1 \) \ + ), 1) OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT) OPT_CFLAGS/unsafe.o += -O1 - # Clang 5.1 - else ifeq ($(shell expr $(CC_VER_MAJOR) = 5 \& $(CC_VER_MINOR) = 1), 1) - OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT) - OPT_CFLAGS/unsafe.o += -O1 - # Clang 6.0 - else ifeq ($(shell expr $(CC_VER_MAJOR) = 6 \& $(CC_VER_MINOR) = 0), 1) - OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT) - OPT_CFLAGS/unsafe.o += -O1 else $(error "Update compiler workarounds for Clang $(CC_VER_MAJOR).$(CC_VER_MINOR)") endif @@ -336,7 +327,7 @@ # 6835796. Problem in GCC 4.3.0 with mulnode.o optimized compilation. ifeq ($(shell expr $(CC_VER_MAJOR) = 4 \& $(CC_VER_MINOR) = 3), 1) OPT_CFLAGS/mulnode.o += $(OPT_CFLAGS/NOOPT) - endif + endif endif # Flags for generating make dependency flags.
--- a/make/bsd/makefiles/mapfile-vers-darwin-debug Thu Apr 30 17:20:25 2015 -0700 +++ b/make/bsd/makefiles/mapfile-vers-darwin-debug Fri May 01 03:56:01 2015 -0700 @@ -105,6 +105,7 @@ _JVM_GetClassTypeAnnotations _JVM_GetDeclaredClasses _JVM_GetDeclaringClass + _JVM_GetSimpleBinaryName _JVM_GetEnclosingMethodInfo _JVM_GetFieldIxModifiers _JVM_GetFieldTypeAnnotations
--- a/make/bsd/makefiles/mapfile-vers-darwin-product Thu Apr 30 17:20:25 2015 -0700 +++ b/make/bsd/makefiles/mapfile-vers-darwin-product Fri May 01 03:56:01 2015 -0700 @@ -105,6 +105,7 @@ _JVM_GetClassTypeAnnotations _JVM_GetDeclaredClasses _JVM_GetDeclaringClass + _JVM_GetSimpleBinaryName _JVM_GetEnclosingMethodInfo _JVM_GetFieldIxModifiers _JVM_GetFieldTypeAnnotations
--- a/make/bsd/makefiles/mapfile-vers-debug Thu Apr 30 17:20:25 2015 -0700 +++ b/make/bsd/makefiles/mapfile-vers-debug Fri May 01 03:56:01 2015 -0700 @@ -19,7 +19,7 @@ # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA # or visit www.oracle.com if you need additional information or have any # questions. -# +# # # Define public interface. @@ -107,6 +107,7 @@ JVM_GetClassTypeAnnotations; JVM_GetDeclaredClasses; JVM_GetDeclaringClass; + JVM_GetSimpleBinaryName; JVM_GetEnclosingMethodInfo; JVM_GetFieldIxModifiers; JVM_GetFieldTypeAnnotations;
--- a/make/bsd/makefiles/mapfile-vers-product Thu Apr 30 17:20:25 2015 -0700 +++ b/make/bsd/makefiles/mapfile-vers-product Fri May 01 03:56:01 2015 -0700 @@ -19,7 +19,7 @@ # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA # or visit www.oracle.com if you need additional information or have any # questions. -# +# # # Define public interface. @@ -107,6 +107,7 @@ JVM_GetClassTypeAnnotations; JVM_GetDeclaredClasses; JVM_GetDeclaringClass; + JVM_GetSimpleBinaryName; JVM_GetEnclosingMethodInfo; JVM_GetFieldIxModifiers; JVM_GetFieldTypeAnnotations;
--- a/make/bsd/makefiles/universal.gmk Thu Apr 30 17:20:25 2015 -0700 +++ b/make/bsd/makefiles/universal.gmk Fri May 01 03:56:01 2015 -0700 @@ -54,12 +54,12 @@ # Consolidate architecture builds into a single Universal binary universalize: $(UNIVERSAL_LIPO_LIST) $(UNIVERSAL_COPY_LIST) - $(RM) -r $(EXPORT_PATH)/jre/lib/{i386,amd64} + $(RM) -r $(EXPORT_PATH)/lib/{i386,amd64} # Package built libraries in a universal binary $(UNIVERSAL_LIPO_LIST): - BUILT_LIPO_FILES="`find $(EXPORT_JRE_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_JRE_LIB_DIR)/,,$@) 2>/dev/null`" || test $$? = "1"; \ + BUILT_LIPO_FILES="`find $(EXPORT_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_LIB_DIR)/,,$@) 2>/dev/null`" || test $$? = "1"; \ if [ -n "$${BUILT_LIPO_FILES}" ]; then \ $(MKDIR) -p $(shell dirname $@); \ lipo -create -output $@ $${BUILT_LIPO_FILES}; \ @@ -70,7 +70,7 @@ # - copies directories; including empty dirs # - copies files, symlinks, other non-directory files $(UNIVERSAL_COPY_LIST): - BUILT_COPY_FILES="`find $(EXPORT_JRE_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_JRE_LIB_DIR)/,,$@) -prune 2>/dev/null`" || test $$? = "1"; \ + BUILT_COPY_FILES="`find $(EXPORT_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_LIB_DIR)/,,$@) -prune 2>/dev/null`" || test $$? = "1"; \ if [ -n "$${BUILT_COPY_FILES}" ]; then \ for i in $${BUILT_COPY_FILES}; do \ $(MKDIR) -p $(shell dirname $@); \ @@ -80,21 +80,21 @@ # Replace arch specific binaries with universal binaries -# Do not touch jre/lib/{client,server}/libjsig.$(LIBRARY_SUFFIX) +# Do not touch lib/{client,server}/libjsig.$(LIBRARY_SUFFIX) # That symbolic link belongs to the 'jdk' build. export_universal: - $(RM) -r $(EXPORT_PATH)/jre/lib/{i386,amd64} - $(RM) -r $(JDK_IMAGE_DIR)/jre/lib/{i386,amd64} + $(RM) -r $(EXPORT_PATH)/lib/{i386,amd64} + $(RM) -r $(JDK_IMAGE_DIR)/lib/{i386,amd64} ($(CD) $(EXPORT_PATH) && \ $(TAR) -cf - *) | \ ($(CD) $(JDK_IMAGE_DIR) && $(TAR) -xpf -) # Overlay universal binaries -# Do not touch jre/lib/{client,server}/libjsig.$(LIBRARY_SUFFIX) +# Do not touch lib/{client,server}/libjsig.$(LIBRARY_SUFFIX) # That symbolic link belongs to the 'jdk' build. copy_universal: - $(RM) -r $(JDK_IMAGE_DIR)$(COPY_SUBDIR)/jre/lib/{i386,amd64} + $(RM) -r $(JDK_IMAGE_DIR)$(COPY_SUBDIR)/lib/{i386,amd64} ($(CD) $(EXPORT_PATH)$(COPY_SUBDIR) && \ $(TAR) -cf - *) | \ ($(CD) $(JDK_IMAGE_DIR)$(COPY_SUBDIR) && $(TAR) -xpf -)
--- a/make/build.sh Thu Apr 30 17:20:25 2015 -0700 +++ b/make/build.sh Fri May 01 03:56:01 2015 -0700 @@ -40,7 +40,7 @@ exit 1 fi -if [ "${JAVA_HOME-}" = "" -o ! -d "${JAVA_HOME-}" -o ! -d ${JAVA_HOME-}/jre/lib/ ]; then +if [ "${JAVA_HOME-}" = "" -o ! -d "${JAVA_HOME-}" ]; then echo "JAVA_HOME needs to be set to a valid JDK path" echo "JAVA_HOME: ${JAVA_HOME-}" exit 1
--- a/make/defs.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/defs.make Fri May 01 03:56:01 2015 -0700 @@ -350,15 +350,13 @@ EXPORT_INCLUDE_DIR = $(EXPORT_PATH)/include EXPORT_DOCS_DIR = $(EXPORT_PATH)/docs EXPORT_LIB_DIR = $(EXPORT_PATH)/lib -EXPORT_JRE_DIR = $(EXPORT_PATH)/jre -EXPORT_JRE_BIN_DIR = $(EXPORT_JRE_DIR)/bin -EXPORT_JRE_LIB_DIR = $(EXPORT_JRE_DIR)/lib -EXPORT_JRE_LIB_ARCH_DIR = $(EXPORT_JRE_LIB_DIR)/$(LIBARCH) +EXPORT_BIN_DIR = $(EXPORT_PATH)/bin +EXPORT_LIB_ARCH_DIR = $(EXPORT_LIB_DIR)/$(LIBARCH) # non-universal macosx builds need to appear universal ifeq ($(OS_VENDOR), Darwin) ifneq ($(MACOSX_UNIVERSAL), true) - EXPORT_JRE_LIB_ARCH_DIR = $(EXPORT_JRE_LIB_DIR) + EXPORT_LIB_ARCH_DIR = $(EXPORT_LIB_DIR) endif endif @@ -370,4 +368,3 @@ EXPORT_LIST += $(EXPORT_INCLUDE_DIR)/jmm.h .PHONY: $(HS_ALT_MAKE)/defs.make -
--- a/make/linux/makefiles/defs.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/linux/makefiles/defs.make Fri May 01 03:56:01 2015 -0700 @@ -244,17 +244,17 @@ EXPORT_LIST += $(EXPORT_DOCS_DIR)/platform/jvmti/jvmti.html # client and server subdirectories have symbolic links to ../libjsig.so -EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX) +EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX) ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) ifeq ($(ZIP_DEBUGINFO_FILES),1) - EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.diz + EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.diz else - EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.debuginfo + EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.debuginfo endif endif -EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server -EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client -EXPORT_MINIMAL_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/minimal +EXPORT_SERVER_DIR = $(EXPORT_LIB_ARCH_DIR)/server +EXPORT_CLIENT_DIR = $(EXPORT_LIB_ARCH_DIR)/client +EXPORT_MINIMAL_DIR = $(EXPORT_LIB_ARCH_DIR)/minimal ifeq ($(findstring true, $(JVM_VARIANT_SERVER) $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK) $(JVM_VARIANT_CORE)), true) EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt @@ -295,14 +295,14 @@ # Serviceability Binaries -ADD_SA_BINARIES/DEFAULT = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ +ADD_SA_BINARIES/DEFAULT = $(EXPORT_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ $(EXPORT_LIB_DIR)/sa-jdi.jar ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) ifeq ($(ZIP_DEBUGINFO_FILES),1) - ADD_SA_BINARIES/DEFAULT += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz + ADD_SA_BINARIES/DEFAULT += $(EXPORT_LIB_ARCH_DIR)/libsaproc.diz else - ADD_SA_BINARIES/DEFAULT += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo + ADD_SA_BINARIES/DEFAULT += $(EXPORT_LIB_ARCH_DIR)/libsaproc.debuginfo endif endif
--- a/make/linux/makefiles/mapfile-vers-debug Thu Apr 30 17:20:25 2015 -0700 +++ b/make/linux/makefiles/mapfile-vers-debug Fri May 01 03:56:01 2015 -0700 @@ -19,7 +19,7 @@ # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA # or visit www.oracle.com if you need additional information or have any # questions. -# +# # # Define public interface. @@ -107,6 +107,7 @@ JVM_GetClassTypeAnnotations; JVM_GetDeclaredClasses; JVM_GetDeclaringClass; + JVM_GetSimpleBinaryName; JVM_GetEnclosingMethodInfo; JVM_GetFieldIxModifiers; JVM_GetFieldTypeAnnotations;
--- a/make/linux/makefiles/mapfile-vers-product Thu Apr 30 17:20:25 2015 -0700 +++ b/make/linux/makefiles/mapfile-vers-product Fri May 01 03:56:01 2015 -0700 @@ -19,7 +19,7 @@ # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA # or visit www.oracle.com if you need additional information or have any # questions. -# +# # # Define public interface. @@ -107,6 +107,7 @@ JVM_GetClassTypeAnnotations; JVM_GetDeclaredClasses; JVM_GetDeclaringClass; + JVM_GetSimpleBinaryName; JVM_GetEnclosingMethodInfo; JVM_GetFieldIxModifiers; JVM_GetFieldTypeAnnotations;
--- a/make/linux/makefiles/vm.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/linux/makefiles/vm.make Fri May 01 03:56:01 2015 -0700 @@ -127,7 +127,7 @@ # By default, link the *.o into the library, not the executable. LINK_INTO$(LINK_INTO) = LIBJVM -JDK_LIBDIR = $(JAVA_HOME)/jre/lib/$(LIBARCH) +JDK_LIBDIR = $(JAVA_HOME)/lib/$(LIBARCH) #---------------------------------------------------------------------- # jvm_db & dtrace
--- a/make/solaris/makefiles/defs.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/solaris/makefiles/defs.make Fri May 01 03:56:01 2015 -0700 @@ -224,17 +224,17 @@ EXPORT_LIST += $(EXPORT_DOCS_DIR)/platform/jvmti/jvmti.html # client and server subdirectories have symbolic links to ../libjsig.$(LIBRARY_SUFFIX) -EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX) +EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX) ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) ifeq ($(ZIP_DEBUGINFO_FILES),1) - EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.diz + EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.diz else - EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.debuginfo + EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.debuginfo endif endif -EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server -EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client +EXPORT_SERVER_DIR = $(EXPORT_LIB_ARCH_DIR)/server +EXPORT_CLIENT_DIR = $(EXPORT_LIB_ARCH_DIR)/client ifeq ($(JVM_VARIANT_SERVER),true) EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt @@ -295,12 +295,12 @@ endif endif -EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) +EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) ifeq ($(ZIP_DEBUGINFO_FILES),1) - EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz + EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libsaproc.diz else - EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo + EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libsaproc.debuginfo endif endif EXPORT_LIST += $(EXPORT_LIB_DIR)/sa-jdi.jar
--- a/make/solaris/makefiles/dtrace.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/solaris/makefiles/dtrace.make Fri May 01 03:56:01 2015 -0700 @@ -130,8 +130,9 @@ $(XLIBJVM_DTRACE): $(DTRACE_SRCDIR)/$(JVM_DTRACE).c $(DTRACE_SRCDIR)/$(JVM_DTRACE).h $(LIBJVM_DTRACE_MAPFILE) @echo $(LOG_INFO) Making $@ $(QUIETLY) mkdir -p $(XLIBJVM_DIR) ; \ - $(CC) $(SYMFLAG) $(ARCHFLAG/$(ISA)) -D$(TYPE) -I. \ - $(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c -lc -lthread -ldoor + $(CC) $(SYMFLAG) $(ARCHFLAG/$(ISA)) -D$(TYPE) -I. $(EXTRA_CFLAGS) \ + $(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c \ + $(EXTRA_LDFLAGS) -lc -lthread -ldoor ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) $(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(XLIBJVM_DTRACE_DEBUGINFO) # Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not @@ -216,8 +217,9 @@ $(LIBJVM_DTRACE): $(DTRACE_SRCDIR)/$(JVM_DTRACE).c $(XLIBJVM_DTRACE) $(DTRACE_SRCDIR)/$(JVM_DTRACE).h $(LIBJVM_DTRACE_MAPFILE) @echo $(LOG_INFO) Making $@ - $(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) -D$(TYPE) -I. \ - $(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c -lc -lthread -ldoor + $(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) -D$(TYPE) -I. $(EXTRA_CFLAGS) \ + $(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c \ + $(EXTRA_LDFLAGS) -lc -lthread -ldoor ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) $(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJVM_DTRACE_DEBUGINFO) $(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJVM_DTRACE_DEBUGINFO) $@
--- a/make/solaris/makefiles/jsig.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/solaris/makefiles/jsig.make Fri May 01 03:56:01 2015 -0700 @@ -50,7 +50,9 @@ $(LIBJSIG): $(JSIGSRCDIR)/jsig.c $(LIBJSIG_MAPFILE) @echo $(LOG_INFO) Making signal interposition lib... $(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \ - $(LFLAGS_JSIG) -o $@ $(JSIGSRCDIR)/jsig.c -ldl + $(EXTRA_CFLAGS) \ + $(LFLAGS_JSIG) $(EXTRA_LDFLAGS) \ + -o $@ $(JSIGSRCDIR)/jsig.c -ldl ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) $(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJSIG_DEBUGINFO) $(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJSIG_DEBUGINFO) $@
--- a/make/solaris/makefiles/mapfile-vers Thu Apr 30 17:20:25 2015 -0700 +++ b/make/solaris/makefiles/mapfile-vers Fri May 01 03:56:01 2015 -0700 @@ -19,7 +19,7 @@ # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA # or visit www.oracle.com if you need additional information or have any # questions. -# +# # # Define public interface. @@ -30,7 +30,7 @@ JNI_CreateJavaVM; JNI_GetCreatedJavaVMs; JNI_GetDefaultJavaVMInitArgs; - + # JVM JVM_ActiveProcessorCount; JVM_ArrayCopy; @@ -107,6 +107,7 @@ JVM_GetClassTypeAnnotations; JVM_GetDeclaredClasses; JVM_GetDeclaringClass; + JVM_GetSimpleBinaryName; JVM_GetEnclosingMethodInfo; JVM_GetFieldIxModifiers; JVM_GetFieldTypeAnnotations;
--- a/make/solaris/makefiles/product.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/solaris/makefiles/product.make Fri May 01 03:56:01 2015 -0700 @@ -37,6 +37,11 @@ OPT_CFLAGS/ciEnv.o = $(OPT_CFLAGS) -xinline=no%__1cFciEnvbFpost_compiled_method_load_event6MpnHnmethod__v_ endif +# Need extra inlining to get oop_ps_push_contents functions to perform well enough. +ifndef USE_GCC +OPT_CFLAGS/psPromotionManager.o = $(OPT_CFLAGS) -W2,-Ainline:inc=1000 +endif + # (OPT_CFLAGS/SLOWER is also available, to alter compilation of buggy files) ifeq ("${Platform_compiler}", "sparcWorks")
--- a/make/solaris/makefiles/saproc.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/solaris/makefiles/saproc.make Fri May 01 03:56:01 2015 -0700 @@ -89,6 +89,17 @@ # when actually building on Nevada-B158 or earlier: #SOLARIS_11_B159_OR_LATER=-DSOLARIS_11_B159_OR_LATER +$(SADISOBJ): $(SADISSRCFILES) + $(QUIETLY) $(CC) \ + $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \ + -I$(SASRCDIR) \ + -I$(GENERATED) \ + -I$(BOOT_JAVA_HOME)/include \ + -I$(BOOT_JAVA_HOME)/include/$(Platform_os_family) \ + $(SOLARIS_11_B159_OR_LATER) \ + $(EXTRA_CFLAGS) \ + $(SADISSRCFILES) \ + -c -o $(SADISOBJ) $(LIBSAPROC): $(SASRCFILES) $(SADISOBJ) $(SAMAPFILE) $(QUIETLY) if [ "$(BOOT_JAVA_HOME)" = "" ]; then \ @@ -103,23 +114,13 @@ -I$(BOOT_JAVA_HOME)/include \ -I$(BOOT_JAVA_HOME)/include/$(Platform_os_family) \ $(SOLARIS_11_B159_OR_LATER) \ + $(EXTRA_CXXFLAGS) $(EXTRA_LDFLAGS) \ + $(SADISOBJ) \ $(SASRCFILES) \ - $(SADISOBJ) \ $(SA_LFLAGS) \ -o $@ \ -ldl -ldemangle -lthread -lc -$(SADISOBJ): $(SADISSRCFILES) - $(QUIETLY) $(CC) \ - $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \ - -I$(SASRCDIR) \ - -I$(GENERATED) \ - -I$(BOOT_JAVA_HOME)/include \ - -I$(BOOT_JAVA_HOME)/include/$(Platform_os_family) \ - $(SOLARIS_11_B159_OR_LATER) \ - $(SADISSRCFILES) \ - -c -o $(SADISOBJ) - ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) $(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBSAPROC_DEBUGINFO) $(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBSAPROC_DEBUGINFO) $@
--- a/make/solaris/makefiles/vm.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/solaris/makefiles/vm.make Fri May 01 03:56:01 2015 -0700 @@ -148,7 +148,7 @@ # By default, link the *.o into the library, not the executable. LINK_INTO$(LINK_INTO) = LIBJVM -JDK_LIBDIR = $(JAVA_HOME)/jre/lib/$(LIBARCH) +JDK_LIBDIR = $(JAVA_HOME)/lib/$(LIBARCH) #---------------------------------------------------------------------- # jvm_db & dtrace @@ -288,6 +288,8 @@ endif endif +LFLAGS_VM += $(EXTRA_LDFLAGS) + ifdef USE_GCC LINK_VM = $(LINK_LIB.CC) else
--- a/make/windows/makefiles/defs.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/windows/makefiles/defs.make Fri May 01 03:56:01 2015 -0700 @@ -249,8 +249,8 @@ endif endif -EXPORT_SERVER_DIR = $(EXPORT_JRE_BIN_DIR)/server -EXPORT_CLIENT_DIR = $(EXPORT_JRE_BIN_DIR)/client +EXPORT_SERVER_DIR = $(EXPORT_BIN_DIR)/server +EXPORT_CLIENT_DIR = $(EXPORT_BIN_DIR)/client ifeq ($(JVM_VARIANT_SERVER),true) EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt @@ -280,13 +280,13 @@ EXPORT_LIST += $(EXPORT_LIB_DIR)/jvm.lib ifeq ($(BUILD_WIN_SA), 1) - EXPORT_LIST += $(EXPORT_JRE_BIN_DIR)/sawindbg.$(LIBRARY_SUFFIX) + EXPORT_LIST += $(EXPORT_BIN_DIR)/sawindbg.$(LIBRARY_SUFFIX) ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) ifeq ($(ZIP_DEBUGINFO_FILES),1) - EXPORT_LIST += $(EXPORT_JRE_BIN_DIR)/sawindbg.diz + EXPORT_LIST += $(EXPORT_BIN_DIR)/sawindbg.diz else - EXPORT_LIST += $(EXPORT_JRE_BIN_DIR)/sawindbg.pdb - EXPORT_LIST += $(EXPORT_JRE_BIN_DIR)/sawindbg.map + EXPORT_LIST += $(EXPORT_BIN_DIR)/sawindbg.pdb + EXPORT_LIST += $(EXPORT_BIN_DIR)/sawindbg.map endif endif EXPORT_LIST += $(EXPORT_LIB_DIR)/sa-jdi.jar
--- a/make/windows/makefiles/sa.make Thu Apr 30 17:20:25 2015 -0700 +++ b/make/windows/makefiles/sa.make Fri May 01 03:56:01 2015 -0700 @@ -91,6 +91,9 @@ SAWINDBG=sawindbg.dll +# Resource file containing VERSIONINFO +SA_Res_Files=.\version.sares + checkAndBuildSA:: $(SAWINDBG) # These do not need to be optimized (don't run a lot of code) and it @@ -126,10 +129,13 @@ # Note that we do not keep sawindbj.obj around as it would then # get included in the dumpbin command in build_vm_def.sh +# Force resources to be rebuilt every time +$(SA_Res_Files): FORCE + # In VS2005 or VS2008 the link command creates a .manifest file that we want # to insert into the linked artifact so we do not need to track it separately. # Use ";#2" for .dll and ";#1" for .exe in the MT command below: -$(SAWINDBG): $(SASRCFILES) +$(SAWINDBG): $(SASRCFILES) $(SA_Res_Files) set INCLUDE=$(SA_INCLUDE)$(INCLUDE) $(CXX) @<< -I"$(BootStrapDir)/include" -I"$(BootStrapDir)/include/win32" @@ -138,7 +144,7 @@ -out:$*.obj << set LIB=$(SA_LIB)$(LIB) - $(LD) -out:$@ -DLL sawindbg.obj sadis.obj dbgeng.lib $(SA_LFLAGS) + $(LD) -out:$@ -DLL sawindbg.obj sadis.obj dbgeng.lib $(SA_LFLAGS) $(SA_Res_Files) !if "$(MT)" != "" $(MT) -manifest $(@F).manifest -outputresource:$(@F);#2 !endif @@ -150,6 +156,9 @@ !endif -@rm -f $*.obj +{$(COMMONSRC)\os\windows\vm}.rc.sares: + @$(RC) $(RC_FLAGS) /D "HS_FNAME=$(SAWINDBG)" /fo"$@" $< + cleanall : rm -rf $(GENERATED)/saclasses rm -rf $(GENERATED)/sa-jdi.jar
--- a/src/cpu/aarch64/vm/aarch64.ad Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/aarch64/vm/aarch64.ad Fri May 01 03:56:01 2015 -0700 @@ -2614,6 +2614,8 @@ case INDINDEXSCALEDI2L: case INDINDEXSCALEDOFFSETI2LN: case INDINDEXSCALEDI2LN: + case INDINDEXOFFSETI2L: + case INDINDEXOFFSETI2LN: scale = Address::sxtw(size); break; default: @@ -5060,6 +5062,20 @@ %} %} +operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP reg (ConvI2L ireg)) off); + op_cost(INSN_COST); + format %{ "$reg, $ireg, $off I2L" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale(0x0); + disp($off); + %} +%} + operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off) %{ constraint(ALLOC_IN_RC(ptr_reg)); @@ -5120,7 +5136,7 @@ %{ constraint(ALLOC_IN_RC(ptr_reg)); match(AddP reg off); - op_cost(INSN_COST); + op_cost(0); format %{ "[$reg, $off]" %} interface(MEMORY_INTER) %{ base($reg); @@ -5190,6 +5206,21 @@ %} %} +operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off); + op_cost(INSN_COST); + format %{ "$reg, $ireg, $off I2L\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale(0x0); + disp($off); + %} +%} + operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off) %{ predicate(Universe::narrow_oop_shift() == 0); @@ -5452,8 +5483,8 @@ // memory is used to define read/write location for load/store // instruction defs. we can turn a memory op into an Address -opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL, - indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN); +opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL, + indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN); // iRegIorL2I is used for src inputs in rules for 32 bit int (I) @@ -8346,7 +8377,7 @@ instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{ match(Set dst (AddP src1 (ConvI2L src2))); - ins_cost(INSN_COST); + ins_cost(1.9 * INSN_COST); format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %} ins_encode %{
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1469,7 +1469,7 @@ f(op, 31, 29); f(0b11010000, 28, 21); f(0b000000, 15, 10); - rf(Rm, 16), rf(Rn, 5), rf(Rd, 0); + zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); } #define INSN(NAME, op) \
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/aarch64/vm/globals_aarch64.hpp Fri May 01 03:56:01 2015 -0700 @@ -68,6 +68,8 @@ define_pd_global(bool, UseMembar, true); +define_pd_global(bool, PreserveFramePointer, false); + // GC Ergo Flags define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/aarch64/vm/interp_masm_aarch64.hpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -42,6 +42,8 @@ using MacroAssembler::call_VM_leaf_base; // Interpreter specific version of call_VM_base + using MacroAssembler::call_VM_leaf_base; + virtual void call_VM_leaf_base(address entry_point, int number_of_arguments);
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2238,6 +2238,341 @@ } /** + * Helpers for multiply_to_len(). + */ +void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, + Register src1, Register src2) { + adds(dest_lo, dest_lo, src1); + adc(dest_hi, dest_hi, zr); + adds(dest_lo, dest_lo, src2); + adc(final_dest_hi, dest_hi, zr); +} + +// Generate an address from (r + r1 extend offset). "size" is the +// size of the operand. The result may be in rscratch2. +Address MacroAssembler::offsetted_address(Register r, Register r1, + Address::extend ext, int offset, int size) { + if (offset || (ext.shift() % size != 0)) { + lea(rscratch2, Address(r, r1, ext)); + return Address(rscratch2, offset); + } else { + return Address(r, r1, ext); + } +} + +/** + * Multiply 64 bit by 64 bit first loop. + */ +void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx) { + // + // jlong carry, x[], y[], z[]; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { + // huge_128 product = y[idx] * x[xstart] + carry; + // z[kdx] = (jlong)product; + // carry = (jlong)(product >>> 64); + // } + // z[xstart] = carry; + // + + Label L_first_loop, L_first_loop_exit; + Label L_one_x, L_one_y, L_multiply; + + subsw(xstart, xstart, 1); + br(Assembler::MI, L_one_x); + + lea(rscratch1, Address(x, xstart, Address::lsl(LogBytesPerInt))); + ldr(x_xstart, Address(rscratch1)); + ror(x_xstart, x_xstart, 32); // convert big-endian to little-endian + + bind(L_first_loop); + subsw(idx, idx, 1); + br(Assembler::MI, L_first_loop_exit); + subsw(idx, idx, 1); + br(Assembler::MI, L_one_y); + lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt))); + ldr(y_idx, Address(rscratch1)); + ror(y_idx, y_idx, 32); // convert big-endian to little-endian + bind(L_multiply); + + // AArch64 has a multiply-accumulate instruction that we can't use + // here because it has no way to process carries, so we have to use + // separate add and adc instructions. Bah. + umulh(rscratch1, x_xstart, y_idx); // x_xstart * y_idx -> rscratch1:product + mul(product, x_xstart, y_idx); + adds(product, product, carry); + adc(carry, rscratch1, zr); // x_xstart * y_idx + carry -> carry:product + + subw(kdx, kdx, 2); + ror(product, product, 32); // back to big-endian + str(product, offsetted_address(z, kdx, Address::uxtw(LogBytesPerInt), 0, BytesPerLong)); + + b(L_first_loop); + + bind(L_one_y); + ldrw(y_idx, Address(y, 0)); + b(L_multiply); + + bind(L_one_x); + ldrw(x_xstart, Address(x, 0)); + b(L_first_loop); + + bind(L_first_loop_exit); +} + +/** + * Multiply 128 bit by 128. Unrolled inner loop. + * + */ +void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4, + Register tmp6, Register product_hi) { + + // jlong carry, x[], y[], z[]; + // int kdx = ystart+1; + // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop + // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; + // jlong carry2 = (jlong)(tmp3 >>> 64); + // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; + // carry = (jlong)(tmp4 >>> 64); + // z[kdx+idx+1] = (jlong)tmp3; + // z[kdx+idx] = (jlong)tmp4; + // } + // idx += 2; + // if (idx > 0) { + // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; + // z[kdx+idx] = (jlong)yz_idx1; + // carry = (jlong)(yz_idx1 >>> 64); + // } + // + + Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; + + lsrw(jdx, idx, 2); + + bind(L_third_loop); + + subsw(jdx, jdx, 1); + br(Assembler::MI, L_third_loop_exit); + subw(idx, idx, 4); + + lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt))); + + ldp(yz_idx2, yz_idx1, Address(rscratch1, 0)); + + lea(tmp6, Address(z, idx, Address::uxtw(LogBytesPerInt))); + + ror(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian + ror(yz_idx2, yz_idx2, 32); + + ldp(rscratch2, rscratch1, Address(tmp6, 0)); + + mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 + umulh(tmp4, product_hi, yz_idx1); + + ror(rscratch1, rscratch1, 32); // convert big-endian to little-endian + ror(rscratch2, rscratch2, 32); + + mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp + umulh(carry2, product_hi, yz_idx2); + + // propagate sum of both multiplications into carry:tmp4:tmp3 + adds(tmp3, tmp3, carry); + adc(tmp4, tmp4, zr); + adds(tmp3, tmp3, rscratch1); + adcs(tmp4, tmp4, tmp); + adc(carry, carry2, zr); + adds(tmp4, tmp4, rscratch2); + adc(carry, carry, zr); + + ror(tmp3, tmp3, 32); // convert little-endian to big-endian + ror(tmp4, tmp4, 32); + stp(tmp4, tmp3, Address(tmp6, 0)); + + b(L_third_loop); + bind (L_third_loop_exit); + + andw (idx, idx, 0x3); + cbz(idx, L_post_third_loop_done); + + Label L_check_1; + subsw(idx, idx, 2); + br(Assembler::MI, L_check_1); + + lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt))); + ldr(yz_idx1, Address(rscratch1, 0)); + ror(yz_idx1, yz_idx1, 32); + mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 + umulh(tmp4, product_hi, yz_idx1); + lea(rscratch1, Address(z, idx, Address::uxtw(LogBytesPerInt))); + ldr(yz_idx2, Address(rscratch1, 0)); + ror(yz_idx2, yz_idx2, 32); + + add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2); + + ror(tmp3, tmp3, 32); + str(tmp3, Address(rscratch1, 0)); + + bind (L_check_1); + + andw (idx, idx, 0x1); + subsw(idx, idx, 1); + br(Assembler::MI, L_post_third_loop_done); + ldrw(tmp4, Address(y, idx, Address::uxtw(LogBytesPerInt))); + mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 + umulh(carry2, tmp4, product_hi); + ldrw(tmp4, Address(z, idx, Address::uxtw(LogBytesPerInt))); + + add2_with_carry(carry2, tmp3, tmp4, carry); + + strw(tmp3, Address(z, idx, Address::uxtw(LogBytesPerInt))); + extr(carry, carry2, tmp3, 32); + + bind(L_post_third_loop_done); +} + +/** + * Code for BigInteger::multiplyToLen() instrinsic. + * + * r0: x + * r1: xlen + * r2: y + * r3: ylen + * r4: z + * r5: zlen + * r10: tmp1 + * r11: tmp2 + * r12: tmp3 + * r13: tmp4 + * r14: tmp5 + * r15: tmp6 + * r16: tmp7 + * + */ +void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, + Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + Register tmp5, Register tmp6, Register product_hi) { + + assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); + + const Register idx = tmp1; + const Register kdx = tmp2; + const Register xstart = tmp3; + + const Register y_idx = tmp4; + const Register carry = tmp5; + const Register product = xlen; + const Register x_xstart = zlen; // reuse register + + // First Loop. + // + // final static long LONG_MASK = 0xffffffffL; + // int xstart = xlen - 1; + // int ystart = ylen - 1; + // long carry = 0; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { + // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; + // z[kdx] = (int)product; + // carry = product >>> 32; + // } + // z[xstart] = (int)carry; + // + + movw(idx, ylen); // idx = ylen; + movw(kdx, zlen); // kdx = xlen+ylen; + mov(carry, zr); // carry = 0; + + Label L_done; + + movw(xstart, xlen); + subsw(xstart, xstart, 1); + br(Assembler::MI, L_done); + + multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + + Label L_second_loop; + cbzw(kdx, L_second_loop); + + Label L_carry; + subw(kdx, kdx, 1); + cbzw(kdx, L_carry); + + strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt))); + lsr(carry, carry, 32); + subw(kdx, kdx, 1); + + bind(L_carry); + strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt))); + + // Second and third (nested) loops. + // + // for (int i = xstart-1; i >= 0; i--) { // Second loop + // carry = 0; + // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop + // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + + // (z[k] & LONG_MASK) + carry; + // z[k] = (int)product; + // carry = product >>> 32; + // } + // z[i] = (int)carry; + // } + // + // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi + + const Register jdx = tmp1; + + bind(L_second_loop); + mov(carry, zr); // carry = 0; + movw(jdx, ylen); // j = ystart+1 + + subsw(xstart, xstart, 1); // i = xstart-1; + br(Assembler::MI, L_done); + + str(z, Address(pre(sp, -4 * wordSize))); + + Label L_last_x; + lea(z, offsetted_address(z, xstart, Address::uxtw(LogBytesPerInt), 4, BytesPerInt)); // z = z + k - j + subsw(xstart, xstart, 1); // i = xstart-1; + br(Assembler::MI, L_last_x); + + lea(rscratch1, Address(x, xstart, Address::uxtw(LogBytesPerInt))); + ldr(product_hi, Address(rscratch1)); + ror(product_hi, product_hi, 32); // convert big-endian to little-endian + + Label L_third_loop_prologue; + bind(L_third_loop_prologue); + + str(ylen, Address(sp, wordSize)); + stp(x, xstart, Address(sp, 2 * wordSize)); + multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, + tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); + ldp(z, ylen, Address(post(sp, 2 * wordSize))); + ldp(x, xlen, Address(post(sp, 2 * wordSize))); // copy old xstart -> xlen + + addw(tmp3, xlen, 1); + strw(carry, Address(z, tmp3, Address::uxtw(LogBytesPerInt))); + subsw(tmp3, tmp3, 1); + br(Assembler::MI, L_done); + + lsr(carry, carry, 32); + strw(carry, Address(z, tmp3, Address::uxtw(LogBytesPerInt))); + b(L_second_loop); + + // Next infrequent code is moved outside loops. + bind(L_last_x); + ldrw(product_hi, Address(x, 0)); + b(L_third_loop_prologue); + + bind(L_done); +} + +/** * Emits code to update CRC-32 with a byte value according to constants in table * * @param [in,out]crc Register containing the crc.
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -167,9 +167,8 @@ // aliases defined in AARCH64 spec - template<class T> - inline void cmpw(Register Rd, T imm) { subsw(zr, Rd, imm); } + inline void cmpw(Register Rd, T imm) { subsw(zr, Rd, imm); } inline void cmp(Register Rd, unsigned imm) { subs(zr, Rd, imm); } inline void cmnw(Register Rd, unsigned imm) { addsw(zr, Rd, imm); } @@ -1121,9 +1120,34 @@ Register tmp1, Register tmp2, Register tmp3, Register tmp4, int int_cnt1, Register result); - +private: + void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, + Register src1, Register src2); + void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) { + add2_with_carry(dest_hi, dest_hi, dest_lo, src1, src2); + } + void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx); + void multiply_128_x_128_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4, + Register tmp7, Register product_hi); +public: + void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, + Register zlen, Register tmp1, Register tmp2, Register tmp3, + Register tmp4, Register tmp5, Register tmp6, Register tmp7); // ISB may be needed because of a safepoint void maybe_isb() { isb(); } + +private: + // Return the effective address r + (r1 << ext) + offset. + // Uses rscratch2. + Address offsetted_address(Register r, Register r1, Address::extend ext, + int offset, int size); }; // Used by aarch64.ad to control code generation
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2356,8 +2356,45 @@ return start; } -#undef __ -#define __ masm-> + /** + * Arguments: + * + * Input: + * c_rarg0 - x address + * c_rarg1 - x length + * c_rarg2 - y address + * c_rarg3 - y lenth + * c_rarg4 - z address + * c_rarg5 - z length + */ + address generate_multiplyToLen() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); + + address start = __ pc(); + const Register x = r0; + const Register xlen = r1; + const Register y = r2; + const Register ylen = r3; + const Register z = r4; + const Register zlen = r5; + + const Register tmp1 = r10; + const Register tmp2 = r11; + const Register tmp3 = r12; + const Register tmp4 = r13; + const Register tmp5 = r14; + const Register tmp6 = r15; + const Register tmp7 = r16; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(lr); + + return start; + } // Continuation point for throwing of implicit exceptions that are // not handled in the current activation. Fabricates an exception @@ -2375,6 +2412,9 @@ // otherwise assume that stack unwinding will be initiated, so // caller saved registers were assumed volatile in the compiler. +#undef __ +#define __ masm-> + address generate_throw_exception(const char* name, address runtime_entry, Register arg1 = noreg, @@ -2518,6 +2558,10 @@ // arraycopy stubs used by compilers generate_arraycopy_stubs(); + if (UseMultiplyToLenIntrinsic) { + StubRoutines::_multiplyToLen = generate_multiplyToLen(); + } + #ifndef BUILTIN_SIM if (UseAESIntrinsics) { StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
--- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp Fri May 01 03:56:01 2015 -0700 @@ -2138,30 +2138,7 @@ __ br(Assembler::EQ, resolved); // resolve first time through - address entry; - switch (bytecode()) { - case Bytecodes::_getstatic: - case Bytecodes::_putstatic: - case Bytecodes::_getfield: - case Bytecodes::_putfield: - entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); - break; - case Bytecodes::_invokevirtual: - case Bytecodes::_invokespecial: - case Bytecodes::_invokestatic: - case Bytecodes::_invokeinterface: - entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); - break; - case Bytecodes::_invokehandle: - entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle); - break; - case Bytecodes::_invokedynamic: - entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); - break; - default: - fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode()))); - break; - } + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); __ mov(temp, (int) bytecode()); __ call_VM(noreg, entry, temp);
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -193,6 +193,15 @@ } } + // This machine allows unaligned memory accesses + if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { + FLAG_SET_DEFAULT(UseUnalignedAccesses, true); + } + + if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { + UseMultiplyToLenIntrinsic = true; + } + #ifdef COMPILER2 if (FLAG_IS_DEFAULT(OptoScheduling)) { OptoScheduling = true;
--- a/src/cpu/ppc/vm/assembler_ppc.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/assembler_ppc.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -85,8 +85,7 @@ } // Low-level andi-one-instruction-macro. -void Assembler::andi(Register a, Register s, const int ui16) { - assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate"); +void Assembler::andi(Register a, Register s, const long ui16) { if (is_power_of_2_long(((jlong) ui16)+1)) { // pow2minus1 clrldi(a, s, 64-log2_long((((jlong) ui16)+1))); @@ -97,6 +96,7 @@ // negpow2 clrrdi(a, s, log2_long((jlong)-ui16)); } else { + assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate"); andi_(a, s, ui16); } } @@ -356,7 +356,6 @@ // 16 bit immediate offset. int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) { // Avoid accidentally trying to use R0 for indexed addressing. - assert(d != R0, "R0 not allowed"); assert_different_registers(d, tmp); short xa, xb, xc, xd; // Four 16-bit chunks of const. @@ -370,6 +369,58 @@ return 0; } + int retval = 0; + if (return_simm16_rest) { + retval = xd; + x = rem << 16; + xd = 0; + } + + if (d == R0) { // Can't use addi. + if (is_simm(x, 32)) { // opt 2: simm32 + lis(d, x >> 16); + if (xd) ori(d, d, (unsigned short)xd); + } else { + // 64-bit value: x = xa xb xc xd + xa = (x >> 48) & 0xffff; + xb = (x >> 32) & 0xffff; + xc = (x >> 16) & 0xffff; + bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0); + if (tmp == noreg || (xc == 0 && xd == 0)) { + if (xa_loaded) { + lis(d, xa); + if (xb) { ori(d, d, (unsigned short)xb); } + } else { + li(d, xb); + } + sldi(d, d, 32); + if (xc) { oris(d, d, (unsigned short)xc); } + if (xd) { ori( d, d, (unsigned short)xd); } + } else { + // Exploit instruction level parallelism if we have a tmp register. + bool xc_loaded = (xd & 0x8000) ? (xc != -1) : (xc != 0); + if (xa_loaded) { + lis(tmp, xa); + } + if (xc_loaded) { + lis(d, xc); + } + if (xa_loaded) { + if (xb) { ori(tmp, tmp, (unsigned short)xb); } + } else { + li(tmp, xb); + } + if (xc_loaded) { + if (xd) { ori(d, d, (unsigned short)xd); } + } else { + li(d, xd); + } + insrdi(d, tmp, 32, 0); + } + } + return retval; + } + xc = rem & 0xFFFF; // Next 16-bit chunk. rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend. @@ -377,28 +428,27 @@ lis(d, xc); } else { // High 32 bits needed. - if (tmp != noreg) { // opt 3: We have a temp reg. + if (tmp != noreg && (int)x != 0) { // opt 3: We have a temp reg. // No carry propagation between xc and higher chunks here (use logical instructions). xa = (x >> 48) & 0xffff; xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0. - bool load_xa = (xa != 0) || (xb < 0); + bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0); bool return_xd = false; - if (load_xa) { lis(tmp, xa); } + if (xa_loaded) { lis(tmp, xa); } if (xc) { lis(d, xc); } - if (load_xa) { + if (xa_loaded) { if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0. } else { - li(tmp, xb); // non-negative + li(tmp, xb); } if (xc) { - if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi. - else if (xd) { addi(d, d, xd); } + if (xd) { addi(d, d, xd); } } else { li(d, xd); } insrdi(d, tmp, 32, 0); - return return_xd ? xd : 0; // non-negative + return retval; } xb = rem & 0xFFFF; // Next 16-bit chunk. @@ -417,11 +467,51 @@ if (xc) { addis(d, d, xc); } } - // opt 5: Return offset to be inserted into following instruction. - if (return_simm16_rest) return xd; + if (xd) { addi(d, d, xd); } + return retval; +} + +// We emit only one addition to s to optimize latency. +int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp, bool return_simm16_rest) { + assert(s != R0 && s != tmp, "unsupported"); + long rem = x; - if (xd) { addi(d, d, xd); } - return 0; + // Case 1: Can use mr or addi. + short xd = rem & 0xFFFF; // Lowest 16-bit chunk. + rem = (rem >> 16) + ((unsigned short)xd >> 15); + if (rem == 0) { + if (xd == 0) { + if (d != s) { mr(d, s); } + return 0; + } + if (return_simm16_rest) { + return xd; + } + addi(d, s, xd); + return 0; + } + + // Case 2: Can use addis. + if (xd == 0) { + short xc = rem & 0xFFFF; // 2nd 16-bit chunk. + rem = (rem >> 16) + ((unsigned short)xd >> 15); + if (rem == 0) { + addis(d, s, xc); + return 0; + } + } + + // Other cases: load & add. + Register tmp1 = tmp, + tmp2 = noreg; + if ((d != tmp) && (d != s)) { + // Can use d. + tmp1 = d; + tmp2 = tmp; + } + int simm16_rest = load_const_optimized(tmp1, x, tmp2, return_simm16_rest); + add(d, tmp1, s); + return simm16_rest; } #ifndef PRODUCT
--- a/src/cpu/ppc/vm/assembler_ppc.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/assembler_ppc.hpp Fri May 01 03:56:01 2015 -0700 @@ -224,10 +224,13 @@ ADDIS_OPCODE = (15u << OPCODE_SHIFT), ADDIC__OPCODE = (13u << OPCODE_SHIFT), ADDE_OPCODE = (31u << OPCODE_SHIFT | 138u << 1), + ADDME_OPCODE = (31u << OPCODE_SHIFT | 234u << 1), + ADDZE_OPCODE = (31u << OPCODE_SHIFT | 202u << 1), SUBF_OPCODE = (31u << OPCODE_SHIFT | 40u << 1), SUBFC_OPCODE = (31u << OPCODE_SHIFT | 8u << 1), SUBFE_OPCODE = (31u << OPCODE_SHIFT | 136u << 1), SUBFIC_OPCODE = (8u << OPCODE_SHIFT), + SUBFME_OPCODE = (31u << OPCODE_SHIFT | 232u << 1), SUBFZE_OPCODE = (31u << OPCODE_SHIFT | 200u << 1), DIVW_OPCODE = (31u << OPCODE_SHIFT | 491u << 1), MULLW_OPCODE = (31u << OPCODE_SHIFT | 235u << 1), @@ -657,6 +660,9 @@ SYNC_OPCODE = (31u << OPCODE_SHIFT | 598u << 1), EIEIO_OPCODE = (31u << OPCODE_SHIFT | 854u << 1), + // Wait instructions for polling. + WAIT_OPCODE = (31u << OPCODE_SHIFT | 62u << 1), + // Trap instructions TDI_OPCODE = (2u << OPCODE_SHIFT), TWI_OPCODE = (3u << OPCODE_SHIFT), @@ -666,8 +672,10 @@ // Atomics. LWARX_OPCODE = (31u << OPCODE_SHIFT | 20u << 1), LDARX_OPCODE = (31u << OPCODE_SHIFT | 84u << 1), + LQARX_OPCODE = (31u << OPCODE_SHIFT | 276u << 1), STWCX_OPCODE = (31u << OPCODE_SHIFT | 150u << 1), - STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1) + STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1), + STQCX_OPCODE = (31u << OPCODE_SHIFT | 182u << 1) }; @@ -1171,6 +1179,14 @@ inline void adde_( Register d, Register a, Register b); inline void subfe( Register d, Register a, Register b); inline void subfe_( Register d, Register a, Register b); + inline void addme( Register d, Register a); + inline void addme_( Register d, Register a); + inline void subfme( Register d, Register a); + inline void subfme_(Register d, Register a); + inline void addze( Register d, Register a); + inline void addze_( Register d, Register a); + inline void subfze( Register d, Register a); + inline void subfze_(Register d, Register a); inline void neg( Register d, Register a); inline void neg_( Register d, Register a); inline void mulli( Register d, Register a, int si16); @@ -1189,6 +1205,38 @@ inline void divw( Register d, Register a, Register b); inline void divw_( Register d, Register a, Register b); + // Fixed-Point Arithmetic Instructions with Overflow detection + inline void addo( Register d, Register a, Register b); + inline void addo_( Register d, Register a, Register b); + inline void subfo( Register d, Register a, Register b); + inline void subfo_( Register d, Register a, Register b); + inline void addco( Register d, Register a, Register b); + inline void addco_( Register d, Register a, Register b); + inline void subfco( Register d, Register a, Register b); + inline void subfco_( Register d, Register a, Register b); + inline void addeo( Register d, Register a, Register b); + inline void addeo_( Register d, Register a, Register b); + inline void subfeo( Register d, Register a, Register b); + inline void subfeo_( Register d, Register a, Register b); + inline void addmeo( Register d, Register a); + inline void addmeo_( Register d, Register a); + inline void subfmeo( Register d, Register a); + inline void subfmeo_(Register d, Register a); + inline void addzeo( Register d, Register a); + inline void addzeo_( Register d, Register a); + inline void subfzeo( Register d, Register a); + inline void subfzeo_(Register d, Register a); + inline void nego( Register d, Register a); + inline void nego_( Register d, Register a); + inline void mulldo( Register d, Register a, Register b); + inline void mulldo_( Register d, Register a, Register b); + inline void mullwo( Register d, Register a, Register b); + inline void mullwo_( Register d, Register a, Register b); + inline void divdo( Register d, Register a, Register b); + inline void divdo_( Register d, Register a, Register b); + inline void divwo( Register d, Register a, Register b); + inline void divwo_( Register d, Register a, Register b); + // extended mnemonics inline void li( Register d, int si16); inline void lis( Register d, int si16); @@ -1303,7 +1351,7 @@ inline void isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg); // PPC 1, section 3.3.11, Fixed-Point Logical Instructions - void andi( Register a, Register s, int ui16); // optimized version + void andi( Register a, Register s, long ui16); // optimized version inline void andi_( Register a, Register s, int ui16); inline void andis_( Register a, Register s, int ui16); inline void ori( Register a, Register s, int ui16); @@ -1688,14 +1736,21 @@ inline void isync(); inline void elemental_membar(int e); // Elemental Memory Barriers (>=Power 8) + // Wait instructions for polling. Attention: May result in SIGILL. + inline void wait(); + inline void waitrsv(); // >=Power7 + // atomics inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0); inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0); + inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0); inline bool lxarx_hint_exclusive_access(); inline void lwarx( Register d, Register a, Register b, bool hint_exclusive_access = false); inline void ldarx( Register d, Register a, Register b, bool hint_exclusive_access = false); + inline void lqarx( Register d, Register a, Register b, bool hint_exclusive_access = false); inline void stwcx_( Register s, Register a, Register b); inline void stdcx_( Register s, Register a, Register b); + inline void stqcx_( Register s, Register a, Register b); // Instructions for adjusting thread priority for simultaneous // multithreading (SMT) on Power5. @@ -2054,10 +2109,13 @@ // Atomics: use ra0mem to disallow R0 as base. inline void lwarx_unchecked(Register d, Register b, int eh1); inline void ldarx_unchecked(Register d, Register b, int eh1); + inline void lqarx_unchecked(Register d, Register b, int eh1); inline void lwarx( Register d, Register b, bool hint_exclusive_access); inline void ldarx( Register d, Register b, bool hint_exclusive_access); + inline void lqarx( Register d, Register b, bool hint_exclusive_access); inline void stwcx_(Register s, Register b); inline void stdcx_(Register s, Register b); + inline void stqcx_(Register s, Register b); inline void lfs( FloatRegister d, int si16); inline void lfsx( FloatRegister d, Register b); inline void lfd( FloatRegister d, int si16); @@ -2120,6 +2178,20 @@ return load_const_optimized(d, (long)(unsigned long)a, tmp, return_simm16_rest); } + // If return_simm16_rest, the return value needs to get added afterwards. + int add_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false); + inline int add_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) { + return add_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest); + } + + // If return_simm16_rest, the return value needs to get added afterwards. + inline int sub_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false) { + return add_const_optimized(d, s, -x, tmp, return_simm16_rest); + } + inline int sub_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) { + return sub_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest); + } + // Creation Assembler(CodeBuffer* code) : AbstractAssembler(code) { #ifdef CHECK_DELAY
--- a/src/cpu/ppc/vm/assembler_ppc.inline.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp Fri May 01 03:56:01 2015 -0700 @@ -100,6 +100,14 @@ inline void Assembler::adde_( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } inline void Assembler::subfe( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } inline void Assembler::subfe_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::addme( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); } +inline void Assembler::addme_( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); } +inline void Assembler::subfme( Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); } +inline void Assembler::subfme_(Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); } +inline void Assembler::addze( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); } +inline void Assembler::addze_( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); } +inline void Assembler::subfze( Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); } +inline void Assembler::subfze_(Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); } inline void Assembler::neg( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); } inline void Assembler::neg_( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); } inline void Assembler::mulli( Register d, Register a, int si16) { emit_int32(MULLI_OPCODE | rt(d) | ra(a) | simm(si16, 16)); } @@ -118,6 +126,38 @@ inline void Assembler::divw( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } inline void Assembler::divw_( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +// Fixed-Point Arithmetic Instructions with Overflow detection +inline void Assembler::addo( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::addo_( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::subfo( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::subfo_( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::addco( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::addco_( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::subfco( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::subfco_( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::addeo( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::addeo_( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::subfeo( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::subfeo_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::addmeo( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); } +inline void Assembler::addmeo_( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); } +inline void Assembler::subfmeo( Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); } +inline void Assembler::subfmeo_(Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); } +inline void Assembler::addzeo( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); } +inline void Assembler::addzeo_( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); } +inline void Assembler::subfzeo( Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); } +inline void Assembler::subfzeo_(Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); } +inline void Assembler::nego( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); } +inline void Assembler::nego_( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); } +inline void Assembler::mulldo( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::mulldo_( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::mullwo( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::mullwo_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::divdo( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::divdo_( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::divwo( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::divwo_( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } + // extended mnemonics inline void Assembler::li( Register d, int si16) { Assembler::addi_r0ok( d, R0, si16); } inline void Assembler::lis( Register d, int si16) { Assembler::addis_r0ok(d, R0, si16); } @@ -540,15 +580,22 @@ inline void Assembler::isync() { emit_int32( ISYNC_OPCODE); } inline void Assembler::elemental_membar(int e) { assert(0 < e && e < 16, "invalid encoding"); emit_int32( SYNC_OPCODE | e1215(e)); } +// Wait instructions for polling. +inline void Assembler::wait() { emit_int32( WAIT_OPCODE); } +inline void Assembler::waitrsv() { emit_int32( WAIT_OPCODE | 1<<(31-10)); } // WC=0b01 >=Power7 + // atomics // Use ra0mem to disallow R0 as base. inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } +inline void Assembler::lqarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } inline bool Assembler::lxarx_hint_exclusive_access() { return VM_Version::has_lxarxeh(); } inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::lqarx( Register d, Register a, Register b, bool hint_exclusive_access) { lqarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::stwcx_(Register s, Register a, Register b) { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } inline void Assembler::stdcx_(Register s, Register a, Register b) { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } +inline void Assembler::stqcx_(Register s, Register a, Register b) { emit_int32( STQCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } // Instructions for adjusting thread priority // for simultaneous multithreading (SMT) on POWER5. @@ -873,10 +920,13 @@ // ra0 version inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } +inline void Assembler::lqarx_unchecked(Register d, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::lqarx( Register d, Register b, bool hint_exclusive_access){ lqarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::stwcx_(Register s, Register b) { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); } inline void Assembler::stdcx_(Register s, Register b) { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); } +inline void Assembler::stqcx_(Register s, Register b) { emit_int32( STQCX_OPCODE | rs(s) | rb(b) | rc(1)); } // ra0 version inline void Assembler::lfs( FloatRegister d, int si16) { emit_int32( LFS_OPCODE | frt(d) | simm(si16,16)); }
--- a/src/cpu/ppc/vm/c2_globals_ppc.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/c2_globals_ppc.hpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -47,7 +47,7 @@ define_pd_global(intx, FLOATPRESSURE, 28); define_pd_global(intx, FreqInlineSize, 175); define_pd_global(intx, MinJumpTableSize, 10); -define_pd_global(intx, INTPRESSURE, 25); +define_pd_global(intx, INTPRESSURE, 26); define_pd_global(intx, InteriorEntryAlignment, 16); define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); define_pd_global(intx, RegisterCostAreaRatio, 16000);
--- a/src/cpu/ppc/vm/globals_ppc.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/globals_ppc.hpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2013 SAP AG. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -55,10 +55,12 @@ define_pd_global(bool, UseMembar, false); +define_pd_global(bool, PreserveFramePointer, false); + // GC Ergo Flags define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // Default max size of CMS young gen, per GC worker thread. -define_pd_global(uintx, TypeProfileLevel, 0); +define_pd_global(uintx, TypeProfileLevel, 111); // Platform dependent flag handling: flags only defined on this platform. #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ @@ -71,14 +73,26 @@ \ product(uintx, PowerArchitecturePPC64, 0, \ "CPU Version: x for PowerX. Currently recognizes Power5 to " \ - "Power7. Default is 0. CPUs newer than Power7 will be " \ - "recognized as Power7.") \ + "Power8. Default is 0. Newer CPUs will be recognized as Power8.") \ \ /* Reoptimize code-sequences of calls at runtime, e.g. replace an */ \ /* indirect call by a direct call. */ \ product(bool, ReoptimizeCallSequences, true, \ "Reoptimize code-sequences of calls at runtime.") \ \ + /* Power 8: Configure Data Stream Control Register. */ \ + product(uint64_t,DSCR_PPC64, (uintx)-1, \ + "Power8 or later: Specify encoded value for Data Stream Control " \ + "Register") \ + product(uint64_t,DSCR_DPFD_PPC64, 8, \ + "Power8 or later: DPFD (default prefetch depth) value of the " \ + "Data Stream Control Register." \ + " 0: hardware default, 1: none, 2-7: min-max, 8: don't touch") \ + product(uint64_t,DSCR_URG_PPC64, 8, \ + "Power8 or later: URG (depth attainment urgency) value of the " \ + "Data Stream Control Register." \ + " 0: hardware default, 1: none, 2-7: min-max, 8: don't touch") \ + \ product(bool, UseLoadInstructionsForStackBangingPPC64, false, \ "Use load instructions for stack banging.") \ \ @@ -121,6 +135,41 @@ \ product(bool, ZapMemory, false, "Write 0x0101... to empty memory." \ " Use this to ease debugging.") \ - + \ + /* Use Restricted Transactional Memory for lock eliding */ \ + product(bool, UseRTMLocking, false, \ + "Enable RTM lock eliding for inflated locks in compiled code") \ + \ + experimental(bool, UseRTMForStackLocks, false, \ + "Enable RTM lock eliding for stack locks in compiled code") \ + \ + product(bool, UseRTMDeopt, false, \ + "Perform deopt and recompilation based on RTM abort ratio") \ + \ + product(uintx, RTMRetryCount, 5, \ + "Number of RTM retries on lock abort or busy") \ + \ + experimental(intx, RTMSpinLoopCount, 100, \ + "Spin count for lock to become free before RTM retry") \ + \ + experimental(intx, RTMAbortThreshold, 1000, \ + "Calculate abort ratio after this number of aborts") \ + \ + experimental(intx, RTMLockingThreshold, 10000, \ + "Lock count at which to do RTM lock eliding without " \ + "abort ratio calculation") \ + \ + experimental(intx, RTMAbortRatio, 50, \ + "Lock abort ratio at which to stop use RTM lock eliding") \ + \ + experimental(intx, RTMTotalCountIncrRate, 64, \ + "Increment total RTM attempted lock count once every n times") \ + \ + experimental(intx, RTMLockingCalculationDelay, 0, \ + "Number of milliseconds to wait before start calculating aborts " \ + "for RTM locking") \ + \ + experimental(bool, UseRTMXendForLockBusy, true, \ + "Use RTM Xend instead of Xabort when lock busy") \ #endif // CPU_PPC_VM_GLOBALS_PPC_HPP
--- a/src/cpu/ppc/vm/interp_masm_ppc_64.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/interp_masm_ppc_64.cpp Fri May 01 03:56:01 2015 -0700 @@ -446,7 +446,7 @@ } // Load object from cpool->resolved_references(index). -void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) { +void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index, Label *is_null) { assert_different_registers(result, index); get_constant_pool(result); @@ -469,7 +469,7 @@ #endif // Add in the index. add(result, tmp, result); - load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result); + load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result, is_null); } // Generate a subtype check: branch to ok_is_subtype if sub_klass is @@ -876,7 +876,6 @@ // If condition is true we are done and hence we can store 0 in the displaced // header indicating it is a recursive lock. bne(CCR0, slow_case); - release(); std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() + BasicLock::displaced_header_offset_in_bytes(), monitor); b(done); @@ -1861,7 +1860,7 @@ const Register mdp = tmp1; add(mdp, tmp1, R28_mdx); - // Pffset of the current profile entry to update. + // Offset of the current profile entry to update. const Register entry_offset = tmp2; // entry_offset = array len in number of cells ld(entry_offset, in_bytes(ArrayData::array_len_offset()), mdp);
--- a/src/cpu/ppc/vm/interp_masm_ppc_64.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/interp_masm_ppc_64.hpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -85,7 +85,7 @@ Register tmp1, Register tmp2, Register tmp3, Label &ok_is_subtype); // Load object from cpool->resolved_references(index). - void load_resolved_reference_at_index(Register result, Register index); + void load_resolved_reference_at_index(Register result, Register index, Label *is_null = NULL); void generate_stack_overflow_check_with_compare_and_throw(Register Rmem_frame_size, Register Rscratch1); void load_receiver(Register Rparam_count, Register Rrecv_dst); @@ -137,7 +137,6 @@ void field_offset_at(int n, Register tmp, Register dest, Register base); int field_offset_at(Register object, address bcp, int offset); void fast_iaaccess(int n, address bcp); - void fast_iagetfield(address bcp); void fast_iaputfield(address bcp, bool do_store_check); void index_check(Register array, Register index, int index_shift, Register tmp, Register res);
--- a/src/cpu/ppc/vm/interpreter_ppc.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/interpreter_ppc.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -427,7 +427,6 @@ return entry; } - // Call an accessor method (assuming it is resolved, otherwise drop into // vanilla (slow path) entry. address InterpreterGenerator::generate_jump_to_normal_entry(void) { @@ -473,7 +472,8 @@ // This is not a leaf but we have a JavaFrameAnchor now and we will // check (create) exceptions afterward so this is ok. - __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError), + R16_thread); // Pop the C frame and restore LR. __ pop_frame();
--- a/src/cpu/ppc/vm/interpreter_ppc.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/interpreter_ppc.hpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -47,4 +47,4 @@ } #endif -#endif // CPU_PPC_VM_INTERPRETER_PPC_PP +#endif // CPU_PPC_VM_INTERPRETER_PPC_HPP
--- a/src/cpu/ppc/vm/macroAssembler_ppc.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/macroAssembler_ppc.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1455,7 +1455,7 @@ // Several special cases exist to avoid that unnecessary information is generated. // void MacroAssembler::cmpxchgd(ConditionRegister flag, - Register dest_current_value, Register compare_value, Register exchange_value, + Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value, Register addr_base, int semantics, bool cmpxchgx_hint, Register int_flag_success, Label* failed_ext, bool contention_hint) { Label retry; @@ -1465,7 +1465,7 @@ // Save one branch if result is returned via register and result register is different from the other ones. bool use_result_reg = (int_flag_success!=noreg); - bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value && + bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value.register_or_noreg() && int_flag_success!=exchange_value && int_flag_success!=addr_base); assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both"); @@ -1481,7 +1481,7 @@ // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM). if (contention_hint) { // Don't try to reserve if cmp fails. ld(dest_current_value, 0, addr_base); - cmpd(flag, dest_current_value, compare_value); + cmpd(flag, compare_value, dest_current_value); bne(flag, failed); } @@ -1489,7 +1489,7 @@ bind(retry); ldarx(dest_current_value, addr_base, cmpxchgx_hint); - cmpd(flag, dest_current_value, compare_value); + cmpd(flag, compare_value, dest_current_value); if (UseStaticBranchPredictionInCompareAndSwapPPC64) { bne_predict_not_taken(flag, failed); } else { @@ -1873,7 +1873,6 @@ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). - fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ? cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, /*where=*/obj_reg, @@ -1909,7 +1908,6 @@ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). - fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ? cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, /*where=*/obj_reg, @@ -1946,7 +1944,6 @@ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). - fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ? cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, /*where=*/obj_reg, @@ -1987,9 +1984,371 @@ beq(cr_reg, done); } +// TM on PPC64. +void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) { + Label retry; + bind(retry); + ldarx(result, addr, /*hint*/ false); + addi(result, result, simm16); + stdcx_(result, addr); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0 + } else { + bne( CCR0, retry); // stXcx_ sets CCR0 + } +} + +void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) { + Label retry; + bind(retry); + lwarx(result, addr, /*hint*/ false); + ori(result, result, uimm16); + stwcx_(result, addr); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0 + } else { + bne( CCR0, retry); // stXcx_ sets CCR0 + } +} + +#if INCLUDE_RTM_OPT + +// Update rtm_counters based on abort status +// input: abort_status +// rtm_counters (RTMLockingCounters*) +void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) { + // Mapping to keep PreciseRTMLockingStatistics similar to x86. + // x86 ppc (! means inverted, ? means not the same) + // 0 31 Set if abort caused by XABORT instruction. + // 1 ! 7 If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set. + // 2 13 Set if another logical processor conflicted with a memory address that was part of the transaction that aborted. + // 3 10 Set if an internal buffer overflowed. + // 4 ?12 Set if a debug breakpoint was hit. + // 5 ?32 Set if an abort occurred during execution of a nested transaction. + const int tm_failure_bit[] = {Assembler::tm_tabort, // Note: Seems like signal handler sets this, too. + Assembler::tm_failure_persistent, // inverted: transient + Assembler::tm_trans_cf, + Assembler::tm_footprint_of, + Assembler::tm_non_trans_cf, + Assembler::tm_suspended}; + const bool tm_failure_inv[] = {false, true, false, false, false, false}; + assert(sizeof(tm_failure_bit)/sizeof(int) == RTMLockingCounters::ABORT_STATUS_LIMIT, "adapt mapping!"); + + const Register addr_Reg = R0; + // Keep track of offset to where rtm_counters_Reg had pointed to. + int counters_offs = RTMLockingCounters::abort_count_offset(); + addi(addr_Reg, rtm_counters_Reg, counters_offs); + const Register temp_Reg = rtm_counters_Reg; + + //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically + ldx(temp_Reg, addr_Reg); + addi(temp_Reg, temp_Reg, 1); + stdx(temp_Reg, addr_Reg); + + if (PrintPreciseRTMLockingStatistics) { + int counters_offs_delta = RTMLockingCounters::abortX_count_offset() - counters_offs; + + //mftexasr(abort_status); done by caller + for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) { + counters_offs += counters_offs_delta; + li(temp_Reg, counters_offs_delta); // can't use addi with R0 + add(addr_Reg, addr_Reg, temp_Reg); // point to next counter + counters_offs_delta = sizeof(uintx); + + Label check_abort; + rldicr_(temp_Reg, abort_status, tm_failure_bit[i], 0); + if (tm_failure_inv[i]) { + bne(CCR0, check_abort); + } else { + beq(CCR0, check_abort); + } + //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically + ldx(temp_Reg, addr_Reg); + addi(temp_Reg, temp_Reg, 1); + stdx(temp_Reg, addr_Reg); + bind(check_abort); + } + } + li(temp_Reg, -counters_offs); // can't use addi with R0 + add(rtm_counters_Reg, addr_Reg, temp_Reg); // restore +} + +// Branch if (random & (count-1) != 0), count is 2^n +// tmp and CR0 are killed +void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) { + mftb(tmp); + andi_(tmp, tmp, count-1); + bne(CCR0, brLabel); +} + +// Perform abort ratio calculation, set no_rtm bit if high ratio. +// input: rtm_counters_Reg (RTMLockingCounters* address) - KILLED +void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg, + RTMLockingCounters* rtm_counters, + Metadata* method_data) { + Label L_done, L_check_always_rtm1, L_check_always_rtm2; + + if (RTMLockingCalculationDelay > 0) { + // Delay calculation. + ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr()); + cmpdi(CCR0, rtm_counters_Reg, 0); + beq(CCR0, L_done); + load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload + } + // Abort ratio calculation only if abort_count > RTMAbortThreshold. + // Aborted transactions = abort_count * 100 + // All transactions = total_count * RTMTotalCountIncrRate + // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio) + ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg); + cmpdi(CCR0, R0, RTMAbortThreshold); + blt(CCR0, L_check_always_rtm2); + mulli(R0, R0, 100); + + const Register tmpReg = rtm_counters_Reg; + ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg); + mulli(tmpReg, tmpReg, RTMTotalCountIncrRate); + mulli(tmpReg, tmpReg, RTMAbortRatio); + cmpd(CCR0, R0, tmpReg); + blt(CCR0, L_check_always_rtm1); // jump to reload + if (method_data != NULL) { + // Set rtm_state to "no rtm" in MDO. + // Not using a metadata relocation. Method and Class Loader are kept alive anyway. + // (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.) + load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg); + atomic_ori_int(R0, tmpReg, NoRTM); + } + b(L_done); + + bind(L_check_always_rtm1); + load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload + bind(L_check_always_rtm2); + ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg); + cmpdi(CCR0, tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate); + blt(CCR0, L_done); + if (method_data != NULL) { + // Set rtm_state to "always rtm" in MDO. + // Not using a metadata relocation. See above. + load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg); + atomic_ori_int(R0, tmpReg, UseRTM); + } + bind(L_done); +} + +// Update counters and perform abort ratio calculation. +// input: abort_status_Reg +void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg, + RTMLockingCounters* rtm_counters, + Metadata* method_data, + bool profile_rtm) { + + assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); + // Update rtm counters based on state at abort. + // Reads abort_status_Reg, updates flags. + assert_different_registers(abort_status_Reg, temp_Reg); + load_const_optimized(temp_Reg, (address)rtm_counters, R0); + rtm_counters_update(abort_status_Reg, temp_Reg); + if (profile_rtm) { + assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); + rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data); + } +} + +// Retry on abort if abort's status indicates non-persistent failure. +// inputs: retry_count_Reg +// : abort_status_Reg +// output: retry_count_Reg decremented by 1 +void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, + Label& retryLabel, Label* checkRetry) { + Label doneRetry; + rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0); + bne(CCR0, doneRetry); + if (checkRetry) { bind(*checkRetry); } + addic_(retry_count_Reg, retry_count_Reg, -1); + blt(CCR0, doneRetry); + smt_yield(); // Can't use wait(). No permission (SIGILL). + b(retryLabel); + bind(doneRetry); +} + +// Spin and retry if lock is busy. +// inputs: box_Reg (monitor address) +// : retry_count_Reg +// output: retry_count_Reg decremented by 1 +// CTR is killed +void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) { + Label SpinLoop, doneRetry; + addic_(retry_count_Reg, retry_count_Reg, -1); + blt(CCR0, doneRetry); + li(R0, RTMSpinLoopCount); + mtctr(R0); + + bind(SpinLoop); + smt_yield(); // Can't use waitrsv(). No permission (SIGILL). + bdz(retryLabel); + ld(R0, 0, owner_addr_Reg); + cmpdi(CCR0, R0, 0); + bne(CCR0, SpinLoop); + b(retryLabel); + + bind(doneRetry); +} + +// Use RTM for normal stack locks. +// Input: objReg (object to lock) +void MacroAssembler::rtm_stack_locking(ConditionRegister flag, + Register obj, Register mark_word, Register tmp, + Register retry_on_abort_count_Reg, + RTMLockingCounters* stack_rtm_counters, + Metadata* method_data, bool profile_rtm, + Label& DONE_LABEL, Label& IsInflated) { + assert(UseRTMForStackLocks, "why call this otherwise?"); + assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); + Label L_rtm_retry, L_decrement_retry, L_on_abort; + + if (RTMRetryCount > 0) { + load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort + bind(L_rtm_retry); + } + andi_(R0, mark_word, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased + bne(CCR0, IsInflated); + + if (PrintPreciseRTMLockingStatistics || profile_rtm) { + Label L_noincrement; + if (RTMTotalCountIncrRate > 1) { + branch_on_random_using_tb(tmp, (int)RTMTotalCountIncrRate, L_noincrement); + } + assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM"); + load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0); + //atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically + ldx(mark_word, tmp); + addi(mark_word, mark_word, 1); + stdx(mark_word, tmp); + bind(L_noincrement); + } + tbegin_(); + beq(CCR0, L_on_abort); + ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // Reload in transaction, conflicts need to be tracked. + andi(R0, mark_word, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits + cmpwi(flag, R0, markOopDesc::unlocked_value); // bits = 001 unlocked + beq(flag, DONE_LABEL); // all done if unlocked + + if (UseRTMXendForLockBusy) { + tend_(); + b(L_decrement_retry); + } else { + tabort_(); + } + bind(L_on_abort); + const Register abort_status_Reg = tmp; + mftexasr(abort_status_Reg); + if (PrintPreciseRTMLockingStatistics || profile_rtm) { + rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm); + } + ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload + if (RTMRetryCount > 0) { + // Retry on lock abort if abort status is not permanent. + rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry); + } else { + bind(L_decrement_retry); + } +} + +// Use RTM for inflating locks +// inputs: obj (object to lock) +// mark_word (current header - KILLED) +// boxReg (on-stack box address (displaced header location) - KILLED) +void MacroAssembler::rtm_inflated_locking(ConditionRegister flag, + Register obj, Register mark_word, Register boxReg, + Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg, + RTMLockingCounters* rtm_counters, + Metadata* method_data, bool profile_rtm, + Label& DONE_LABEL) { + assert(UseRTMLocking, "why call this otherwise?"); + Label L_rtm_retry, L_decrement_retry, L_on_abort; + // Clean monitor_value bit to get valid pointer. + int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; + + // Store non-null, using boxReg instead of (intptr_t)markOopDesc::unused_mark(). + std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg); + const Register tmpReg = boxReg; + const Register owner_addr_Reg = mark_word; + addi(owner_addr_Reg, mark_word, owner_offset); + + if (RTMRetryCount > 0) { + load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy. + load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort. + bind(L_rtm_retry); + } + if (PrintPreciseRTMLockingStatistics || profile_rtm) { + Label L_noincrement; + if (RTMTotalCountIncrRate > 1) { + branch_on_random_using_tb(R0, (int)RTMTotalCountIncrRate, L_noincrement); + } + assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); + load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg); + //atomic_inc_ptr(R0, tmpReg); We don't increment atomically + ldx(tmpReg, R0); + addi(tmpReg, tmpReg, 1); + stdx(tmpReg, R0); + bind(L_noincrement); + } + tbegin_(); + beq(CCR0, L_on_abort); + // We don't reload mark word. Will only be reset at safepoint. + ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked. + cmpdi(flag, R0, 0); + beq(flag, DONE_LABEL); + + if (UseRTMXendForLockBusy) { + tend_(); + b(L_decrement_retry); + } else { + tabort_(); + } + bind(L_on_abort); + const Register abort_status_Reg = tmpReg; + mftexasr(abort_status_Reg); + if (PrintPreciseRTMLockingStatistics || profile_rtm) { + rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm); + // Restore owner_addr_Reg + ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); +#ifdef ASSERT + andi_(R0, mark_word, markOopDesc::monitor_value); + asm_assert_ne("must be inflated", 0xa754); // Deflating only allowed at safepoint. +#endif + addi(owner_addr_Reg, mark_word, owner_offset); + } + if (RTMRetryCount > 0) { + // Retry on lock abort if abort status is not permanent. + rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry); + } + + // Appears unlocked - try to swing _owner from null to non-null. + cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg, + MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true); + + if (RTMRetryCount > 0) { + // success done else retry + b(DONE_LABEL); + bind(L_decrement_retry); + // Spin and retry if lock is busy. + rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry); + } else { + bind(L_decrement_retry); + } +} + +#endif // INCLUDE_RTM_OPT + // "The box" is the space on the stack where we copy the object mark. void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box, - Register temp, Register displaced_header, Register current_header) { + Register temp, Register displaced_header, Register current_header, + bool try_bias, + RTMLockingCounters* rtm_counters, + RTMLockingCounters* stack_rtm_counters, + Metadata* method_data, + bool use_rtm, bool profile_rtm) { assert_different_registers(oop, box, temp, displaced_header, current_header); assert(flag != CCR0, "bad condition register"); Label cont; @@ -2006,10 +2365,18 @@ return; } - if (UseBiasedLocking) { + if (try_bias) { biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont); } +#if INCLUDE_RTM_OPT + if (UseRTMForStackLocks && use_rtm) { + rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header, + stack_rtm_counters, method_data, profile_rtm, + cont, object_has_monitor); + } +#endif // INCLUDE_RTM_OPT + // Handle existing monitor. if ((EmitSync & 0x02) == 0) { // The object has an existing monitor iff (mark & monitor_value) != 0. @@ -2066,14 +2433,22 @@ bind(object_has_monitor); // The object's monitor m is unlocked iff m->owner == NULL, // otherwise m->owner may contain a thread or a stack address. - // + +#if INCLUDE_RTM_OPT + // Use the same RTM locking code in 32- and 64-bit VM. + if (use_rtm) { + rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header, + rtm_counters, method_data, profile_rtm, cont); + } else { +#endif // INCLUDE_RTM_OPT + // Try to CAS m->owner from NULL to current thread. addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value); li(displaced_header, 0); // CmpxchgX sets flag to cmpX(current, displaced). cmpxchgd(/*flag=*/flag, /*current_value=*/current_header, - /*compare_value=*/displaced_header, + /*compare_value=*/(intptr_t)0, /*exchange_value=*/R16_thread, /*where=*/temp, MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, @@ -2095,6 +2470,10 @@ //asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp, // "monitor->OwnerIsThread shouldn't be 0", -1); # endif + +#if INCLUDE_RTM_OPT + } // use_rtm() +#endif } bind(cont); @@ -2103,7 +2482,8 @@ } void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, - Register temp, Register displaced_header, Register current_header) { + Register temp, Register displaced_header, Register current_header, + bool try_bias, bool use_rtm) { assert_different_registers(oop, box, temp, displaced_header, current_header); assert(flag != CCR0, "bad condition register"); Label cont; @@ -2115,10 +2495,24 @@ return; } - if (UseBiasedLocking) { + if (try_bias) { biased_locking_exit(flag, oop, current_header, cont); } +#if INCLUDE_RTM_OPT + if (UseRTMForStackLocks && use_rtm) { + assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); + Label L_regular_unlock; + ld(current_header, oopDesc::mark_offset_in_bytes(), oop); // fetch markword + andi(R0, current_header, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits + cmpwi(flag, R0, markOopDesc::unlocked_value); // bits = 001 unlocked + bne(flag, L_regular_unlock); // else RegularLock + tend_(); // otherwise end... + b(cont); // ... and we're done + bind(L_regular_unlock); + } +#endif + // Find the lock address and load the displaced header from the stack. ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box); @@ -2129,13 +2523,12 @@ // Handle existing monitor. if ((EmitSync & 0x02) == 0) { // The object has an existing monitor iff (mark & monitor_value) != 0. + RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done ld(current_header, oopDesc::mark_offset_in_bytes(), oop); - andi(temp, current_header, markOopDesc::monitor_value); - cmpdi(flag, temp, 0); - bne(flag, object_has_monitor); + andi_(R0, current_header, markOopDesc::monitor_value); + bne(CCR0, object_has_monitor); } - // Check if it is still a light weight lock, this is is true if we see // the stack address of the basicLock in the markOop of the object. // Cmpxchg sets flag to cmpd(current_header, box). @@ -2158,6 +2551,20 @@ bind(object_has_monitor); addi(current_header, current_header, -markOopDesc::monitor_value); // monitor ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header); + + // It's inflated. +#if INCLUDE_RTM_OPT + if (use_rtm) { + Label L_regular_inflated_unlock; + // Clean monitor_value bit to get valid pointer + cmpdi(flag, temp, 0); + bne(flag, L_regular_inflated_unlock); + tend_(); + b(cont); + bind(L_regular_inflated_unlock); + } +#endif + ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header); xorr(temp, R16_thread, temp); // Will be 0 if we are the owner. orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions. @@ -2441,6 +2848,8 @@ // oop_result // R16_thread->in_bytes(JavaThread::vm_result_offset()) + verify_thread(); + ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread); li(R0, 0); std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread); @@ -2462,26 +2871,24 @@ std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread); } - -void MacroAssembler::encode_klass_not_null(Register dst, Register src) { +Register MacroAssembler::encode_klass_not_null(Register dst, Register src) { Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. if (Universe::narrow_klass_base() != 0) { // Use dst as temp if it is free. - load_const(R0, Universe::narrow_klass_base(), (dst != current && dst != R0) ? dst : noreg); - sub(dst, current, R0); + sub_const_optimized(dst, current, Universe::narrow_klass_base(), R0); current = dst; } if (Universe::narrow_klass_shift() != 0) { srdi(dst, current, Universe::narrow_klass_shift()); current = dst; } - mr_if_needed(dst, current); // Move may be required. + return current; } void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) { if (UseCompressedClassPointers) { - encode_klass_not_null(ck, klass); - stw(ck, oopDesc::klass_offset_in_bytes(), dst_oop); + Register compressedKlass = encode_klass_not_null(ck, klass); + stw(compressedKlass, oopDesc::klass_offset_in_bytes(), dst_oop); } else { std(klass, oopDesc::klass_offset_in_bytes(), dst_oop); } @@ -2514,8 +2921,7 @@ sldi(shifted_src, src, Universe::narrow_klass_shift()); } if (Universe::narrow_klass_base() != 0) { - load_const(R0, Universe::narrow_klass_base()); - add(dst, shifted_src, R0); + add_const_optimized(dst, shifted_src, Universe::narrow_klass_base(), R0); } }
--- a/src/cpu/ppc/vm/macroAssembler_ppc.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/macroAssembler_ppc.hpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,6 +27,7 @@ #define CPU_PPC_VM_MACROASSEMBLER_PPC_HPP #include "asm/assembler.hpp" +#include "runtime/rtmLocking.hpp" #include "utilities/macros.hpp" // MacroAssembler extends Assembler by a few frequently used macros. @@ -432,8 +433,8 @@ int semantics, bool cmpxchgx_hint = false, Register int_flag_success = noreg, bool contention_hint = false); void cmpxchgd(ConditionRegister flag, - Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, - int semantics, bool cmpxchgx_hint = false, + Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value, + Register addr_base, int semantics, bool cmpxchgx_hint = false, Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false); // interface method calling @@ -506,8 +507,42 @@ // biased locking exit case failed. void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done); - void compiler_fast_lock_object( ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3); - void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3); + void atomic_inc_ptr(Register addr, Register result, int simm16 = 1); + void atomic_ori_int(Register addr, Register result, int uimm16); + +#if INCLUDE_RTM_OPT + void rtm_counters_update(Register abort_status, Register rtm_counters); + void branch_on_random_using_tb(Register tmp, int count, Label& brLabel); + void rtm_abort_ratio_calculation(Register rtm_counters_reg, RTMLockingCounters* rtm_counters, + Metadata* method_data); + void rtm_profiling(Register abort_status_Reg, Register temp_Reg, + RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm); + void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, + Label& retryLabel, Label* checkRetry = NULL); + void rtm_retry_lock_on_busy(Register retry_count, Register owner_addr, Label& retryLabel); + void rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp, + Register retry_on_abort_count, + RTMLockingCounters* stack_rtm_counters, + Metadata* method_data, bool profile_rtm, + Label& DONE_LABEL, Label& IsInflated); + void rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register box, + Register retry_on_busy_count, Register retry_on_abort_count, + RTMLockingCounters* rtm_counters, + Metadata* method_data, bool profile_rtm, + Label& DONE_LABEL); +#endif + + void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box, + Register tmp1, Register tmp2, Register tmp3, + bool try_bias = UseBiasedLocking, + RTMLockingCounters* rtm_counters = NULL, + RTMLockingCounters* stack_rtm_counters = NULL, + Metadata* method_data = NULL, + bool use_rtm = false, bool profile_rtm = false); + + void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, + Register tmp1, Register tmp2, Register tmp3, + bool try_bias = UseBiasedLocking, bool use_rtm = false); // Support for serializing memory accesses between threads void serialize_memory(Register thread, Register tmp1, Register tmp2); @@ -576,7 +611,7 @@ Register tmp = noreg); // Null allowed. - inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg); + inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg, Label *is_null = NULL); // Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong. // src == d allowed. @@ -593,7 +628,7 @@ void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified. static int instr_size_for_decode_klass_not_null(); void decode_klass_not_null(Register dst, Register src = noreg); - void encode_klass_not_null(Register dst, Register src = noreg); + Register encode_klass_not_null(Register dst, Register src = noreg); // Load common heap base into register. void reinit_heapbase(Register d, Register tmp = noreg);
--- a/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -333,19 +333,29 @@ } } -inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1) { +inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1, Label *is_null) { if (UseCompressedOops) { lwz(d, offs, s1); - decode_heap_oop(d); + if (is_null != NULL) { + cmpwi(CCR0, d, 0); + beq(CCR0, *is_null); + decode_heap_oop_not_null(d); + } else { + decode_heap_oop(d); + } } else { ld(d, offs, s1); + if (is_null != NULL) { + cmpdi(CCR0, d, 0); + beq(CCR0, *is_null); + } } } inline Register MacroAssembler::encode_heap_oop_not_null(Register d, Register src) { Register current = (src != noreg) ? src : d; // Oop to be compressed is in d if no src provided. if (Universe::narrow_oop_base_overlaps()) { - sub(d, current, R30); + sub_const_optimized(d, current, Universe::narrow_oop_base(), R0); current = d; } if (Universe::narrow_oop_shift() != 0) { @@ -358,7 +368,7 @@ inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register src) { if (Universe::narrow_oop_base_disjoint() && src != noreg && src != d && Universe::narrow_oop_shift() != 0) { - mr(d, R30); + load_const_optimized(d, Universe::narrow_oop_base(), R0); rldimi(d, src, Universe::narrow_oop_shift(), 32-Universe::narrow_oop_shift()); return d; } @@ -369,7 +379,7 @@ current = d; } if (Universe::narrow_oop_base() != NULL) { - add(d, current, R30); + add_const_optimized(d, current, Universe::narrow_oop_base(), R0); current = d; } return current; // Decoded oop is in this register. @@ -377,11 +387,19 @@ inline void MacroAssembler::decode_heap_oop(Register d) { Label isNull; + bool use_isel = false; if (Universe::narrow_oop_base() != NULL) { cmpwi(CCR0, d, 0); - beq(CCR0, isNull); + if (VM_Version::has_isel()) { + use_isel = true; + } else { + beq(CCR0, isNull); + } } decode_heap_oop_not_null(d); + if (use_isel) { + isel_0(d, CCR0, Assembler::equal); + } bind(isNull); }
--- a/src/cpu/ppc/vm/methodHandles_ppc.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/methodHandles_ppc.cpp Fri May 01 03:56:01 2015 -0700 @@ -466,7 +466,7 @@ strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH const char* mh_reg_name = has_mh ? "R23_method_handle" : "G23"; tty->print_cr("MH %s %s="INTPTR_FORMAT " sp=" INTPTR_FORMAT, - adaptername, mh_reg_name, (intptr_t) mh, entry_sp); + adaptername, mh_reg_name, p2i(mh), p2i(entry_sp)); if (Verbose) { tty->print_cr("Registers:");
--- a/src/cpu/ppc/vm/methodHandles_ppc.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/methodHandles_ppc.hpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2013 SAP AG. All rights reserved. + * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,9 +27,6 @@ // These definitions are inlined into class MethodHandles. // Adapters -//static unsigned int adapter_code_size() { -// return 32*K DEBUG_ONLY(+ 16*K) + (TraceMethodHandles ? 16*K : 0) + (VerifyMethodHandles ? 32*K : 0); -//} enum /* platform_dependent_constants */ { adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000)) }; @@ -45,7 +42,9 @@ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg, Register temp_reg, Register temp2_reg) { - Unimplemented(); + verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), + temp_reg, temp2_reg, + "reference is a MH"); } static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
--- a/src/cpu/ppc/vm/ppc.ad Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/ppc.ad Fri May 01 03:56:01 2015 -0700 @@ -447,8 +447,8 @@ R26, R27, R28, -/*R29*/ // global TOC -/*R30*/ // Narrow Oop Base +/*R29,*/ // global TOC + R30, R31 ); @@ -484,58 +484,11 @@ R26, R27, R28, -/*R29*/ -/*R30*/ // Narrow Oop Base +/*R29,*/ + R30, R31 ); -// Complement-required-in-pipeline operands for narrow oops. -reg_class bits32_reg_ro_not_complement ( -/*R0*/ // R0 - R1, // SP - R2, // TOC - R3, - R4, - R5, - R6, - R7, - R8, - R9, - R10, - R11, - R12, -/*R13,*/ // system thread id - R14, - R15, - R16, // R16_thread - R17, - R18, - R19, - R20, - R21, - R22, -/*R23, - R24, - R25, - R26, - R27, - R28,*/ -/*R29,*/ // TODO: let allocator handle TOC!! -/*R30,*/ - R31 -); - -// Complement-required-in-pipeline operands for narrow oops. -// See 64-bit declaration. -reg_class bits32_reg_ro_complement ( - R23, - R24, - R25, - R26, - R27, - R28 -); - reg_class rscratch1_bits32_reg(R11); reg_class rscratch2_bits32_reg(R12); reg_class rarg1_bits32_reg(R3); @@ -591,8 +544,8 @@ R26_H, R26, R27_H, R27, R28_H, R28, -/*R29_H, R29*/ -/*R30_H, R30*/ +/*R29_H, R29,*/ + R30_H, R30, R31_H, R31 ); @@ -629,8 +582,8 @@ R26_H, R26, R27_H, R27, R28_H, R28, -/*R29_H, R29*/ -/*R30_H, R30*/ +/*R29_H, R29,*/ + R30_H, R30, R31_H, R31 ); @@ -667,8 +620,8 @@ R26_H, R26, R27_H, R27, R28_H, R28, -/*R29_H, R29*/ -/*R30_H, R30*/ +/*R29_H, R29,*/ + R30_H, R30, R31_H, R31 ); @@ -704,64 +657,11 @@ R26_H, R26, R27_H, R27, R28_H, R28, -/*R29_H, R29*/ // TODO: let allocator handle TOC!! -/*R30_H, R30,*/ +/*R29_H, R29,*/ // TODO: let allocator handle TOC!! + R30_H, R30, R31_H, R31 ); -// Complement-required-in-pipeline operands. -reg_class bits64_reg_ro_not_complement ( -/*R0_H, R0*/ // R0 - R1_H, R1, // SP - R2_H, R2, // TOC - R3_H, R3, - R4_H, R4, - R5_H, R5, - R6_H, R6, - R7_H, R7, - R8_H, R8, - R9_H, R9, - R10_H, R10, - R11_H, R11, - R12_H, R12, -/*R13_H, R13*/ // system thread id - R14_H, R14, - R15_H, R15, - R16_H, R16, // R16_thread - R17_H, R17, - R18_H, R18, - R19_H, R19, - R20_H, R20, - R21_H, R21, - R22_H, R22, -/*R23_H, R23, - R24_H, R24, - R25_H, R25, - R26_H, R26, - R27_H, R27, - R28_H, R28,*/ -/*R29_H, R29*/ // TODO: let allocator handle TOC!! -/*R30_H, R30,*/ - R31_H, R31 -); - -// Complement-required-in-pipeline operands. -// This register mask is used for the trap instructions that implement -// the null checks on AIX. The trap instruction first computes the -// complement of the value it shall trap on. Because of this, the -// instruction can not be scheduled in the same cycle as an other -// instruction reading the normal value of the same register. So we -// force the value to check into 'bits64_reg_ro_not_complement' -// and then copy it to 'bits64_reg_ro_complement' for the trap. -reg_class bits64_reg_ro_complement ( - R23_H, R23, - R24_H, R24, - R25_H, R25, - R26_H, R26, - R27_H, R27, - R28_H, R28 -); - // ---------------------------- // Special Class for Condition Code Flags Register @@ -777,6 +677,17 @@ CCR7 ); +reg_class int_flags_ro( + CCR0, + CCR1, + CCR2, + CCR3, + CCR4, + CCR5, + CCR6, + CCR7 +); + reg_class int_flags_CR0(CCR0); reg_class int_flags_CR1(CCR1); reg_class int_flags_CR6(CCR6); @@ -2876,7 +2787,7 @@ // Use release_store for card-marking to ensure that previous // oop-stores are visible before the card-mark change. - enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr) %{ + enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr, flagsReg crx) %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); // FIXME: Implement this as a cmove and use a fixed condition code // register which is written on every transition to compiled code, @@ -2897,8 +2808,8 @@ // Check CMSCollectorCardTableModRefBSExt::_requires_release and do the // StoreStore barrier conditionally. __ lwz(R0, 0, $releaseFieldAddr$$Register); - __ cmpwi(CCR0, R0, 0); - __ beq_predict_taken(CCR0, skip_storestore); + __ cmpwi($crx$$CondRegister, R0, 0); + __ beq_predict_taken($crx$$CondRegister, skip_storestore); #endif __ li(R0, 0); __ membar(Assembler::StoreStore); @@ -3108,7 +3019,7 @@ nodes->push(n2); %} - enc_class enc_cmove_reg(iRegIdst dst, flagsReg crx, iRegIsrc src, cmpOp cmp) %{ + enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{ // TODO: PPC port $archOpcode(ppc64Opcode_cmove); MacroAssembler _masm(&cbuf); @@ -3123,7 +3034,7 @@ __ bind(done); %} - enc_class enc_cmove_imm(iRegIdst dst, flagsReg crx, immI16 src, cmpOp cmp) %{ + enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{ // TODO: PPC port $archOpcode(ppc64Opcode_cmove); MacroAssembler _masm(&cbuf); @@ -3269,7 +3180,7 @@ __ bind(done); %} - enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL mem ) %{ + enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{ // TODO: PPC port $archOpcode(ppc64Opcode_cmove); MacroAssembler _masm(&cbuf); @@ -3281,7 +3192,7 @@ __ bind(done); %} - enc_class enc_bc(flagsReg crx, cmpOp cmp, Label lbl) %{ + enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{ // TODO: PPC port $archOpcode(ppc64Opcode_bc); MacroAssembler _masm(&cbuf); @@ -3309,7 +3220,7 @@ l); %} - enc_class enc_bc_far(flagsReg crx, cmpOp cmp, Label lbl) %{ + enc_class enc_bc_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{ // The scheduler doesn't know about branch shortening, so we set the opcode // to ppc64Opcode_bc in order to hide this detail from the scheduler. // TODO: PPC port $archOpcode(ppc64Opcode_bc); @@ -3341,7 +3252,7 @@ %} // Branch used with Power6 scheduling (can be shortened without changing the node). - enc_class enc_bc_short_far(flagsReg crx, cmpOp cmp, Label lbl) %{ + enc_class enc_bc_short_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{ // The scheduler doesn't know about branch shortening, so we set the opcode // to ppc64Opcode_bc in order to hide this detail from the scheduler. // TODO: PPC port $archOpcode(ppc64Opcode_bc); @@ -4700,6 +4611,15 @@ interface(REG_INTER); %} +operand flagsRegSrc() %{ + constraint(ALLOC_IN_RC(int_flags_ro)); + match(RegFlags); + match(flagsReg); + match(flagsRegCR0); + format %{ %} + interface(REG_INTER); +%} + // Condition Code Flag Register CR0 operand flagsRegCR0() %{ constraint(ALLOC_IN_RC(int_flags_CR0)); @@ -4783,6 +4703,13 @@ predicate(false /* TODO: PPC port MatchDecodeNodes*/); constraint(ALLOC_IN_RC(bits32_reg_ro)); match(DecodeN reg); + format %{ "$reg" %} + interface(REG_INTER) +%} + +operand iRegN2P_klass(iRegNsrc reg) %{ + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(bits32_reg_ro)); match(DecodeNKlass reg); format %{ "$reg" %} interface(REG_INTER) @@ -4839,6 +4766,19 @@ predicate(false /* TODO: PPC port MatchDecodeNodes*/); constraint(ALLOC_IN_RC(bits64_reg_ro)); match(DecodeN reg); + op_cost(100); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp(0x0); + %} +%} + +operand indirectNarrow_klass(iRegNsrc reg) %{ + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(bits64_reg_ro)); match(DecodeNKlass reg); op_cost(100); format %{ "[$reg]" %} @@ -4855,6 +4795,19 @@ predicate(false /* TODO: PPC port MatchDecodeNodes*/); constraint(ALLOC_IN_RC(bits64_reg_ro)); match(AddP (DecodeN reg) offset); + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($offset); + %} +%} + +operand indOffset16Narrow_klass(iRegNsrc reg, immL16 offset) %{ + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(bits64_reg_ro)); match(AddP (DecodeNKlass reg) offset); op_cost(100); format %{ "[$reg + $offset]" %} @@ -4871,6 +4824,19 @@ predicate(false /* TODO: PPC port MatchDecodeNodes*/); constraint(ALLOC_IN_RC(bits64_reg_ro)); match(AddP (DecodeN reg) offset); + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($offset); + %} +%} + +operand indOffset16NarrowAlg4_klass(iRegNsrc reg, immL16Alg4 offset) %{ + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(bits64_reg_ro)); match(AddP (DecodeNKlass reg) offset); op_cost(100); format %{ "[$reg + $offset]" %} @@ -4998,9 +4964,9 @@ // encoding and format. The classic case of this is memory operands. // Indirect is not included since its use is limited to Compare & Swap. -opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indOffset16Narrow); +opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indirectNarrow_klass, indOffset16Narrow, indOffset16Narrow_klass); // Memory operand where offsets are 4-aligned. Required for ld, std. -opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4); +opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4, indOffset16NarrowAlg4_klass); opclass indirectMemory(indirect, indirectNarrow); // Special opclass for I and ConvL2I. @@ -5009,7 +4975,7 @@ // Operand classes to match encode and decode. iRegN_P2N is only used // for storeN. I have never seen an encode node elsewhere. opclass iRegN_P2N(iRegNsrc, iRegP2N); -opclass iRegP_N2P(iRegPsrc, iRegN2P); +opclass iRegP_N2P(iRegPsrc, iRegN2P, iRegN2P_klass); //----------PIPELINE----------------------------------------------------------- @@ -5593,6 +5559,19 @@ ins_pipe(pipe_class_memory); %} +instruct loadN2P_klass_unscaled(iRegPdst dst, memory mem) %{ + match(Set dst (DecodeNKlass (LoadNKlass mem))); + // SAPJVM GL 2014-05-21 Differs. + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0 && + _kids[0]->_leaf->as_Load()->is_unordered()); + ins_cost(MEMORY_REF_COST); + + format %{ "LWZ $dst, $mem \t// DecodeN (unscaled)" %} + size(4); + ins_encode( enc_lwz(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + // Load Pointer instruct loadP(iRegPdst dst, memoryAlg4 mem) %{ match(Set dst (LoadP mem)); @@ -5669,8 +5648,9 @@ %} // Load Float acquire. -instruct loadF_ac(regF dst, memory mem) %{ +instruct loadF_ac(regF dst, memory mem, flagsRegCR0 cr0) %{ match(Set dst (LoadF mem)); + effect(TEMP cr0); ins_cost(3*MEMORY_REF_COST); format %{ "LFS $dst, $mem \t// acquire\n\t" @@ -5705,8 +5685,9 @@ %} // Load Double - aligned acquire. -instruct loadD_ac(regD dst, memory mem) %{ +instruct loadD_ac(regD dst, memory mem, flagsRegCR0 cr0) %{ match(Set dst (LoadD mem)); + effect(TEMP cr0); ins_cost(3*MEMORY_REF_COST); format %{ "LFD $dst, $mem \t// acquire\n\t" @@ -6034,11 +6015,10 @@ instruct loadBase(iRegLdst dst) %{ effect(DEF dst); - format %{ "MR $dst, r30_heapbase" %} - size(4); - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_or); - __ mr($dst$$Register, R30); + format %{ "LoadConst $dst, heapbase" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ load_const_optimized($dst$$Register, Universe::narrow_oop_base(), R0); %} ins_pipe(pipe_class_default); %} @@ -6114,7 +6094,7 @@ effect(TEMP src2); ins_cost(DEFAULT_COST); - format %{ "ORI $dst, $src1, $src2 \t// narrow klass lo" %} + format %{ "ORI $dst, $src1, $src2 \t// narrow klass lo" %} size(4); ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_ori); @@ -6563,8 +6543,9 @@ // do a releasing store. For this, it gets the address of // CMSCollectorCardTableModRefBSExt::_requires_release as input. // (Using releaseFieldAddr in the match rule is a hack.) -instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr) %{ +instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr, flagsReg crx) %{ match(Set mem (StoreCM mem releaseFieldAddr)); + effect(TEMP crx); predicate(false); ins_cost(MEMORY_REF_COST); @@ -6572,7 +6553,7 @@ ins_cannot_rematerialize(true); format %{ "STB #0, $mem \t// CMS card-mark byte (must be 0!), checking requires_release in [$releaseFieldAddr]" %} - ins_encode( enc_cms_card_mark(mem, releaseFieldAddr) ); + ins_encode( enc_cms_card_mark(mem, releaseFieldAddr, crx) ); ins_pipe(pipe_class_memory); %} @@ -6589,8 +6570,9 @@ expand %{ immL baseImm %{ 0 /* TODO: PPC port (jlong)CMSCollectorCardTableModRefBSExt::requires_release_address() */ %} iRegLdst releaseFieldAddress; + flagsReg crx; loadConL_Ex(releaseFieldAddress, baseImm); - storeCM_CMS(mem, releaseFieldAddress); + storeCM_CMS(mem, releaseFieldAddress, crx); %} %} @@ -6639,39 +6621,34 @@ predicate(false); format %{ "SUB $dst, $src, oop_base \t// encode" %} - size(4); - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_subf); - __ subf($dst$$Register, R30, $src$$Register); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ sub_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0); %} ins_pipe(pipe_class_default); %} // Conditional sub base. -instruct cond_sub_base(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{ +instruct cond_sub_base(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{ // The match rule is needed to make it a 'MachTypeNode'! match(Set dst (EncodeP (Binary crx src1))); predicate(false); - ins_variable_size_depending_on_alignment(true); - format %{ "BEQ $crx, done\n\t" - "SUB $dst, $src1, R30 \t// encode: subtract base if != NULL\n" + "SUB $dst, $src1, heapbase \t// encode: subtract base if != NULL\n" "done:" %} - size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8); - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_cmove); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); Label done; __ beq($crx$$CondRegister, done); - __ subf($dst$$Register, R30, $src1$$Register); - // TODO PPC port __ endgroup_if_needed(_size == 12); + __ sub_const_optimized($dst$$Register, $src1$$Register, Universe::narrow_oop_base(), R0); __ bind(done); %} ins_pipe(pipe_class_default); %} // Power 7 can use isel instruction -instruct cond_set_0_oop(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{ +instruct cond_set_0_oop(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{ // The match rule is needed to make it a 'MachTypeNode'! match(Set dst (EncodeP (Binary crx src1))); predicate(false); @@ -6777,42 +6754,37 @@ match(Set dst (DecodeN src)); predicate(false); - format %{ "ADD $dst, $src, R30 \t// DecodeN, add oop base" %} - size(4); - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_add); - __ add($dst$$Register, $src$$Register, R30); + format %{ "ADD $dst, $src, heapbase \t// DecodeN, add oop base" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ add_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0); %} ins_pipe(pipe_class_default); %} // conditianal add base for expand -instruct cond_add_base(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{ +instruct cond_add_base(iRegPdst dst, flagsRegSrc crx, iRegPsrc src) %{ // The match rule is needed to make it a 'MachTypeNode'! // NOTICE that the rule is nonsense - we just have to make sure that: // - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp) // - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC. - match(Set dst (DecodeN (Binary crx src1))); + match(Set dst (DecodeN (Binary crx src))); predicate(false); - ins_variable_size_depending_on_alignment(true); - format %{ "BEQ $crx, done\n\t" - "ADD $dst, $src1, R30 \t// DecodeN: add oop base if $src1 != NULL\n" + "ADD $dst, $src, heapbase \t// DecodeN: add oop base if $src != NULL\n" "done:" %} - size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling()) */? 12 : 8); - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_cmove); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); Label done; __ beq($crx$$CondRegister, done); - __ add($dst$$Register, $src1$$Register, R30); - // TODO PPC port __ endgroup_if_needed(_size == 12); + __ add_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0); __ bind(done); %} ins_pipe(pipe_class_default); %} -instruct cond_set_0_ptr(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{ +instruct cond_set_0_ptr(iRegPdst dst, flagsRegSrc crx, iRegPsrc src1) %{ // The match rule is needed to make it a 'MachTypeNode'! // NOTICE that the rule is nonsense - we just have to make sure that: // - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp) @@ -6888,7 +6860,7 @@ Universe::narrow_oop_base_disjoint()); ins_cost(DEFAULT_COST); - format %{ "MOV $dst, R30 \t\n" + format %{ "MOV $dst, heapbase \t\n" "RLDIMI $dst, $src, shift, 32-shift \t// decode with disjoint base" %} postalloc_expand %{ loadBaseNode *n1 = new loadBaseNode(); @@ -6946,7 +6918,7 @@ assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!"); ra_->set_oop(n_cond_set, true); - + ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); @@ -7303,7 +7275,7 @@ //----------Conditional Move--------------------------------------------------- // Cmove using isel. -instruct cmovI_reg_isel(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{ +instruct cmovI_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{ match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); predicate(VM_Version::has_isel()); ins_cost(DEFAULT_COST); @@ -7321,7 +7293,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovI_reg(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{ +instruct cmovI_reg(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{ match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); predicate(!VM_Version::has_isel()); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7335,7 +7307,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovI_imm(cmpOp cmp, flagsReg crx, iRegIdst dst, immI16 src) %{ +instruct cmovI_imm(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, immI16 src) %{ match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7349,7 +7321,7 @@ %} // Cmove using isel. -instruct cmovL_reg_isel(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{ +instruct cmovL_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{ match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); predicate(VM_Version::has_isel()); ins_cost(DEFAULT_COST); @@ -7367,7 +7339,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovL_reg(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{ +instruct cmovL_reg(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{ match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); predicate(!VM_Version::has_isel()); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7381,7 +7353,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovL_imm(cmpOp cmp, flagsReg crx, iRegLdst dst, immL16 src) %{ +instruct cmovL_imm(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, immL16 src) %{ match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7395,7 +7367,7 @@ %} // Cmove using isel. -instruct cmovN_reg_isel(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{ +instruct cmovN_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{ match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); predicate(VM_Version::has_isel()); ins_cost(DEFAULT_COST); @@ -7414,7 +7386,7 @@ %} // Conditional move for RegN. Only cmov(reg, reg). -instruct cmovN_reg(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{ +instruct cmovN_reg(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{ match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); predicate(!VM_Version::has_isel()); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7428,7 +7400,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovN_imm(cmpOp cmp, flagsReg crx, iRegNdst dst, immN_0 src) %{ +instruct cmovN_imm(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, immN_0 src) %{ match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7442,7 +7414,7 @@ %} // Cmove using isel. -instruct cmovP_reg_isel(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegPsrc src) %{ +instruct cmovP_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegPsrc src) %{ match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); predicate(VM_Version::has_isel()); ins_cost(DEFAULT_COST); @@ -7460,7 +7432,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovP_reg(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegP_N2P src) %{ +instruct cmovP_reg(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegP_N2P src) %{ match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); predicate(!VM_Version::has_isel()); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7474,7 +7446,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovP_imm(cmpOp cmp, flagsReg crx, iRegPdst dst, immP_0 src) %{ +instruct cmovP_imm(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, immP_0 src) %{ match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7487,7 +7459,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovF_reg(cmpOp cmp, flagsReg crx, regF dst, regF src) %{ +instruct cmovF_reg(cmpOp cmp, flagsRegSrc crx, regF dst, regF src) %{ match(Set dst (CMoveF (Binary cmp crx) (Binary dst src))); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7509,7 +7481,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovD_reg(cmpOp cmp, flagsReg crx, regD dst, regD src) %{ +instruct cmovD_reg(cmpOp cmp, flagsRegSrc crx, regD dst, regD src) %{ match(Set dst (CMoveD (Binary cmp crx) (Binary dst src))); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7542,8 +7514,9 @@ // Mem_ptr must be a memory operand, else this node does not get // Flag_needs_anti_dependence_check set by adlc. If this is not set this node // can be rematerialized which leads to errors. -instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal) %{ +instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal, flagsRegCR0 cr0) %{ match(Set crx (StoreLConditional mem_ptr (Binary oldVal newVal))); + effect(TEMP cr0); format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %} ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); @@ -7560,16 +7533,16 @@ // Mem_ptr must be a memory operand, else this node does not get // Flag_needs_anti_dependence_check set by adlc. If this is not set this node // can be rematerialized which leads to errors. -instruct storePConditional_regP_regP_regP(flagsReg crx, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{ - match(Set crx (StorePConditional mem_ptr (Binary oldVal newVal))); - format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %} - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_compound); - __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register, - MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), - noreg, NULL, true); - %} - ins_pipe(pipe_class_default); +instruct storePConditional_regP_regP_regP(flagsRegCR0 cr0, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{ + match(Set cr0 (StorePConditional mem_ptr (Binary oldVal newVal))); + ins_cost(2*MEMORY_REF_COST); + + format %{ "STDCX_ if ($cr0 = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_stdcx_); + __ stdcx_($newVal$$Register, $mem_ptr$$Register); + %} + ins_pipe(pipe_class_memory); %} // Implement LoadPLocked. Must be ordered against changes of the memory location @@ -7577,13 +7550,14 @@ // Don't know whether this is ever used. instruct loadPLocked(iRegPdst dst, memory mem) %{ match(Set dst (LoadPLocked mem)); - ins_cost(MEMORY_REF_COST); - - format %{ "LD $dst, $mem \t// loadPLocked\n\t" - "TWI $dst\n\t" - "ISYNC" %} - size(12); - ins_encode( enc_ld_ac(dst, mem) ); + ins_cost(2*MEMORY_REF_COST); + + format %{ "LDARX $dst, $mem \t// loadPLocked\n\t" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_ldarx); + __ ldarx($dst$$Register, $mem$$Register, MacroAssembler::cmpxchgx_hint_atomic_update()); + %} ins_pipe(pipe_class_memory); %} @@ -7593,8 +7567,9 @@ // (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))" cannot be // matched. -instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2) %{ +instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2))); + effect(TEMP cr0); format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode %{ @@ -7607,8 +7582,9 @@ ins_pipe(pipe_class_default); %} -instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2) %{ +instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2))); + effect(TEMP cr0); format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode %{ @@ -7621,8 +7597,9 @@ ins_pipe(pipe_class_default); %} -instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2) %{ +instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2))); + effect(TEMP cr0); format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode %{ @@ -7635,8 +7612,9 @@ ins_pipe(pipe_class_default); %} -instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2) %{ +instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2))); + effect(TEMP cr0); format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode %{ @@ -7649,48 +7627,54 @@ ins_pipe(pipe_class_default); %} -instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ +instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndAddI mem_ptr src)); + effect(TEMP cr0); format %{ "GetAndAddI $res, $mem_ptr, $src" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode( enc_GetAndAddI(res, mem_ptr, src) ); ins_pipe(pipe_class_default); %} -instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ +instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndAddL mem_ptr src)); + effect(TEMP cr0); format %{ "GetAndAddL $res, $mem_ptr, $src" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode( enc_GetAndAddL(res, mem_ptr, src) ); ins_pipe(pipe_class_default); %} -instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ +instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetI mem_ptr src)); + effect(TEMP cr0); format %{ "GetAndSetI $res, $mem_ptr, $src" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode( enc_GetAndSetI(res, mem_ptr, src) ); ins_pipe(pipe_class_default); %} -instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ +instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetL mem_ptr src)); + effect(TEMP cr0); format %{ "GetAndSetL $res, $mem_ptr, $src" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode( enc_GetAndSetL(res, mem_ptr, src) ); ins_pipe(pipe_class_default); %} -instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src) %{ +instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetP mem_ptr src)); + effect(TEMP cr0); format %{ "GetAndSetP $res, $mem_ptr, $src" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode( enc_GetAndSetL(res, mem_ptr, src) ); ins_pipe(pipe_class_default); %} -instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src) %{ +instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetN mem_ptr src)); + effect(TEMP cr0); format %{ "GetAndSetN $res, $mem_ptr, $src" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode( enc_GetAndSetI(res, mem_ptr, src) ); @@ -7898,18 +7882,8 @@ %} // Immediate Subtraction -// The compiler converts "x-c0" into "x+ -c0" (see SubINode::Ideal), -// so this rule seems to be unused. -instruct subI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{ - match(Set dst (SubI src1 src2)); - format %{ "SUBI $dst, $src1, $src2" %} - size(4); - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_addi); - __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1)); - %} - ins_pipe(pipe_class_default); -%} +// Immediate Subtraction: The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal), +// Don't try to use addi with - $src2$$constant since it can overflow when $src2$$constant == minI16. // SubI from constant (using subfic). instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{ @@ -7989,22 +7963,6 @@ ins_pipe(pipe_class_default); %} -// Immediate Subtraction -// The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal), -// so this rule seems to be unused. -// No constant pool entries required. -instruct subL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{ - match(Set dst (SubL src1 src2)); - - format %{ "SUBI $dst, $src1, $src2 \t// long" %} - size(4); - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_addi); - __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1)); - %} - ins_pipe(pipe_class_default); -%} - // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for // positive longs and 0xF...F for negative ones. instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{ @@ -8165,7 +8123,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovI_bne_negI_reg(iRegIdst dst, flagsReg crx, iRegIsrc src1) %{ +instruct cmovI_bne_negI_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src1) %{ effect(USE_DEF dst, USE src1, USE crx); predicate(false); @@ -8228,7 +8186,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovL_bne_negL_reg(iRegLdst dst, flagsReg crx, iRegLsrc src1) %{ +instruct cmovL_bne_negL_reg(iRegLdst dst, flagsRegSrc crx, iRegLsrc src1) %{ effect(USE_DEF dst, USE src1, USE crx); predicate(false); @@ -8281,7 +8239,7 @@ %} // Long Remainder with registers -instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{ +instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ match(Set dst (ModL src1 src2)); ins_cost(10*DEFAULT_COST); @@ -9011,7 +8969,6 @@ instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{ match(Set dst (AndL src1 src2)); effect(KILL cr0); - ins_cost(DEFAULT_COST); format %{ "ANDI $dst, $src1, $src2 \t// long" %} size(4); @@ -9803,7 +9760,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsReg crx, stackSlotL src) %{ +instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsRegSrc crx, stackSlotL src) %{ // no match-rule, false predicate effect(DEF dst, USE crx, USE src); predicate(false); @@ -9817,7 +9774,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsReg crx, stackSlotL mem) %{ +instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, stackSlotL mem) %{ // no match-rule, false predicate effect(DEF dst, USE crx, USE mem); predicate(false); @@ -9972,7 +9929,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL src) %{ +instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL src) %{ // no match-rule, false predicate effect(DEF dst, USE crx, USE src); predicate(false); @@ -9986,7 +9943,7 @@ ins_pipe(pipe_class_default); %} -instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsReg crx, stackSlotL mem) %{ +instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, stackSlotL mem) %{ // no match-rule, false predicate effect(DEF dst, USE crx, USE mem); predicate(false); @@ -10255,7 +10212,6 @@ size(4); ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_andi_); - // FIXME: avoid andi_ ? __ andi_(R0, $src1$$Register, $src2$$constant); %} ins_pipe(pipe_class_compare); @@ -10302,13 +10258,12 @@ size(4); ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_andi_); - // FIXME: avoid andi_ ? __ andi_(R0, $src1$$Register, $src2$$constant); %} ins_pipe(pipe_class_compare); %} -instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsReg crx) %{ +instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsRegSrc crx) %{ // no match-rule, false predicate effect(DEF dst, USE crx); predicate(false); @@ -10332,7 +10287,7 @@ ins_pipe(pipe_class_compare); %} -instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsReg crx) %{ +instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsRegSrc crx) %{ // no match-rule, false predicate effect(DEF dst, USE crx); predicate(false); @@ -10622,8 +10577,9 @@ //----------Float Compares---------------------------------------------------- instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{ + // Needs matchrule, see cmpDUnordered. + match(Set crx (CmpF src1 src2)); // no match-rule, false predicate - effect(DEF crx, USE src1, USE src2); predicate(false); format %{ "cmpFUrd $crx, $src1, $src2" %} @@ -10731,8 +10687,14 @@ %} instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{ - // no match-rule, false predicate - effect(DEF crx, USE src1, USE src2); + // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the + // node right before the conditional move using it. + // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7, + // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle + // crashed in register allocation where the flags Reg between cmpDUnoredered and a + // conditional move was supposed to be spilled. + match(Set crx (CmpD src1 src2)); + // False predicate, shall not be matched. predicate(false); format %{ "cmpFUrd $crx, $src1, $src2" %} @@ -10830,7 +10792,7 @@ %} // Conditional Near Branch -instruct branchCon(cmpOp cmp, flagsReg crx, label lbl) %{ +instruct branchCon(cmpOp cmp, flagsRegSrc crx, label lbl) %{ // Same match rule as `branchConFar'. match(If cmp crx); effect(USE lbl); @@ -10853,7 +10815,7 @@ // expensive. // // Conditional Far Branch -instruct branchConFar(cmpOp cmp, flagsReg crx, label lbl) %{ +instruct branchConFar(cmpOp cmp, flagsRegSrc crx, label lbl) %{ // Same match rule as `branchCon'. match(If cmp crx); effect(USE crx, USE lbl); @@ -10871,7 +10833,7 @@ %} // Conditional Branch used with Power6 scheduler (can be far or short). -instruct branchConSched(cmpOp cmp, flagsReg crx, label lbl) %{ +instruct branchConSched(cmpOp cmp, flagsRegSrc crx, label lbl) %{ // Same match rule as `branchCon'. match(If cmp crx); effect(USE crx, USE lbl); @@ -10890,7 +10852,7 @@ ins_pipe(pipe_class_default); %} -instruct branchLoopEnd(cmpOp cmp, flagsReg crx, label labl) %{ +instruct branchLoopEnd(cmpOp cmp, flagsRegSrc crx, label labl) %{ match(CountedLoopEnd cmp crx); effect(USE labl); ins_cost(BRANCH_COST); @@ -10904,7 +10866,7 @@ ins_pipe(pipe_class_default); %} -instruct branchLoopEndFar(cmpOp cmp, flagsReg crx, label labl) %{ +instruct branchLoopEndFar(cmpOp cmp, flagsRegSrc crx, label labl) %{ match(CountedLoopEnd cmp crx); effect(USE labl); predicate(!false /* TODO: PPC port HB_Schedule */); @@ -10920,7 +10882,7 @@ %} // Conditional Branch used with Power6 scheduler (can be far or short). -instruct branchLoopEndSched(cmpOp cmp, flagsReg crx, label labl) %{ +instruct branchLoopEndSched(cmpOp cmp, flagsRegSrc crx, label labl) %{ match(CountedLoopEnd cmp crx); effect(USE labl); predicate(false /* TODO: PPC port HB_Schedule */); @@ -10969,13 +10931,36 @@ instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ match(Set crx (FastLock oop box)); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); - // TODO PPC port predicate(!UseNewFastLockPPC64 || UseBiasedLocking); + predicate(/*(!UseNewFastLockPPC64 || UseBiasedLocking) &&*/ !Compile::current()->use_rtm()); format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2, $tmp3" %} ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register, - $tmp3$$Register, $tmp1$$Register, $tmp2$$Register); + $tmp3$$Register, $tmp1$$Register, $tmp2$$Register, + UseBiasedLocking && !UseOptoBiasInlining); // SAPJVM MD 2014-11-06 UseOptoBiasInlining + // If locking was successfull, crx should indicate 'EQ'. + // The compiler generates a branch to the runtime call to + // _complete_monitor_locking_Java for the case where crx is 'NE'. + %} + ins_pipe(pipe_class_compare); +%} + +// Separate version for TM. Use bound register for box to enable USE_KILL. +instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ + match(Set crx (FastLock oop box)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL box); + predicate(Compile::current()->use_rtm()); + + format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2, $tmp3 (TM)" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register, + $tmp3$$Register, $tmp1$$Register, $tmp2$$Register, + /*Biased Locking*/ false, + _rtm_counters, _stack_rtm_counters, + ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), + /*TM*/ true, ra_->C->profile_rtm()); // If locking was successfull, crx should indicate 'EQ'. // The compiler generates a branch to the runtime call to // _complete_monitor_locking_Java for the case where crx is 'NE'. @@ -10986,12 +10971,33 @@ instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ match(Set crx (FastUnlock oop box)); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); + predicate(!Compile::current()->use_rtm()); format %{ "FASTUNLOCK $oop, $box, $tmp1, $tmp2" %} ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register, - $tmp3$$Register, $tmp1$$Register, $tmp2$$Register); + $tmp3$$Register, $tmp1$$Register, $tmp2$$Register, + UseBiasedLocking && !UseOptoBiasInlining, + false); + // If unlocking was successfull, crx should indicate 'EQ'. + // The compiler generates a branch to the runtime call to + // _complete_monitor_unlocking_Java for the case where crx is 'NE'. + %} + ins_pipe(pipe_class_compare); +%} + +instruct cmpFastUnlock_tm(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ + match(Set crx (FastUnlock oop box)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); + predicate(Compile::current()->use_rtm()); + + format %{ "FASTUNLOCK $oop, $box, $tmp1, $tmp2 (TM)" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register, + $tmp3$$Register, $tmp1$$Register, $tmp2$$Register, + /*Biased Locking*/ false, /*TM*/ true); // If unlocking was successfull, crx should indicate 'EQ'. // The compiler generates a branch to the runtime call to // _complete_monitor_unlocking_Java for the case where crx is 'NE'. @@ -11658,6 +11664,66 @@ ins_pipe(pipe_class_default); %} + +//----------Overflow Math Instructions----------------------------------------- + +// Note that we have to make sure that XER.SO is reset before using overflow instructions. +// Simple Overflow operations can be matched by very few instructions (e.g. addExact: xor, and_, bc). +// Seems like only Long intrinsincs have an advantage. (The only expensive one is OverflowMulL.) + +instruct overflowAddL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{ + match(Set cr0 (OverflowAddL op1 op2)); + + format %{ "add_ $op1, $op2\t# overflow check long" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ li(R0, 0); + __ mtxer(R0); // clear XER.SO + __ addo_(R0, $op1$$Register, $op2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{ + match(Set cr0 (OverflowSubL op1 op2)); + + format %{ "subfo_ R0, $op2, $op1\t# overflow check long" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ li(R0, 0); + __ mtxer(R0); // clear XER.SO + __ subfo_(R0, $op2$$Register, $op1$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{ + match(Set cr0 (OverflowSubL zero op2)); + + format %{ "nego_ R0, $op2\t# overflow check long" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ li(R0, 0); + __ mtxer(R0); // clear XER.SO + __ nego_(R0, $op2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{ + match(Set cr0 (OverflowMulL op1 op2)); + + format %{ "mulldo_ R0, $op1, $op2\t# overflow check long" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ li(R0, 0); + __ mtxer(R0); // clear XER.SO + __ mulldo_(R0, $op1$$Register, $op2$$Register); + %} + ins_pipe(pipe_class_default); +%} + + // ============================================================================ // Safepoint Instruction
--- a/src/cpu/ppc/vm/register_definitions_ppc.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/register_definitions_ppc.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2013 SAP AG. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,19 +23,10 @@ * */ -// make sure the defines don't screw up the declarations later on in this file +// Make sure the defines don't screw up the declarations later on in this file. #define DONT_USE_REGISTER_DEFINES -#include "precompiled.hpp" -#include "asm/macroAssembler.hpp" #include "asm/register.hpp" -#include "register_ppc.hpp" -#ifdef TARGET_ARCH_MODEL_ppc_32 -# include "interp_masm_ppc_32.hpp" -#endif -#ifdef TARGET_ARCH_MODEL_ppc_64 -# include "interp_masm_ppc_64.hpp" -#endif REGISTER_DEFINITION(Register, noreg);
--- a/src/cpu/ppc/vm/relocInfo_ppc.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/relocInfo_ppc.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2013 SAP AG. All rights reserved. + * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,14 +25,12 @@ #include "precompiled.hpp" #include "asm/assembler.inline.hpp" -#include "assembler_ppc.inline.hpp" #include "code/relocInfo.hpp" #include "nativeInst_ppc.hpp" #include "oops/oop.inline.hpp" #include "runtime/safepoint.hpp" void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { - bool copy_back_to_oop_pool = true; // TODO: PPC port // The following comment is from the declaration of DataRelocation: // // "The "o" (displacement) argument is relevant only to split relocations
--- a/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,6 +28,7 @@ #include "code/debugInfoRec.hpp" #include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" +#include "frame_ppc.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interp_masm.hpp" #include "oops/compiledICHolder.hpp" @@ -194,8 +195,8 @@ RegisterSaver_LiveIntReg( R27 ), RegisterSaver_LiveIntReg( R28 ), RegisterSaver_LiveIntReg( R29 ), - RegisterSaver_LiveIntReg( R31 ), - RegisterSaver_LiveIntReg( R30 ), // r30 must be the last register + RegisterSaver_LiveIntReg( R30 ), + RegisterSaver_LiveIntReg( R31 ), // must be the last register (see save/restore functions below) }; OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm, @@ -229,29 +230,30 @@ BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {"); - // Save r30 in the last slot of the not yet pushed frame so that we + // Save r31 in the last slot of the not yet pushed frame so that we // can use it as scratch reg. - __ std(R30, -reg_size, R1_SP); + __ std(R31, -reg_size, R1_SP); assert(-reg_size == register_save_offset - frame_size_in_bytes + ((regstosave_num-1)*reg_size), "consistency check"); // save the flags // Do the save_LR_CR by hand and adjust the return pc if requested. - __ mfcr(R30); - __ std(R30, _abi(cr), R1_SP); + __ mfcr(R31); + __ std(R31, _abi(cr), R1_SP); switch (return_pc_location) { - case return_pc_is_lr: __ mflr(R30); break; - case return_pc_is_r4: __ mr(R30, R4); break; + case return_pc_is_lr: __ mflr(R31); break; + case return_pc_is_r4: __ mr(R31, R4); break; case return_pc_is_thread_saved_exception_pc: - __ ld(R30, thread_(saved_exception_pc)); break; + __ ld(R31, thread_(saved_exception_pc)); break; default: ShouldNotReachHere(); } - if (return_pc_adjustment != 0) - __ addi(R30, R30, return_pc_adjustment); - __ std(R30, _abi(lr), R1_SP); + if (return_pc_adjustment != 0) { + __ addi(R31, R31, return_pc_adjustment); + } + __ std(R31, _abi(lr), R1_SP); // push a new frame - __ push_frame(frame_size_in_bytes, R30); + __ push_frame(frame_size_in_bytes, R31); // save all registers (ints and floats) offset = register_save_offset; @@ -261,7 +263,7 @@ switch (reg_type) { case RegisterSaver::int_reg: { - if (reg_num != 30) { // We spilled R30 right at the beginning. + if (reg_num != 31) { // We spilled R31 right at the beginning. __ std(as_Register(reg_num), offset, R1_SP); } break; @@ -272,8 +274,8 @@ } case RegisterSaver::special_reg: { if (reg_num == SR_CTR_SpecialRegisterEnumValue) { - __ mfctr(R30); - __ std(R30, offset, R1_SP); + __ mfctr(R31); + __ std(R31, offset, R1_SP); } else { Unimplemented(); } @@ -321,7 +323,7 @@ switch (reg_type) { case RegisterSaver::int_reg: { - if (reg_num != 30) // R30 restored at the end, it's the tmp reg! + if (reg_num != 31) // R31 restored at the end, it's the tmp reg! __ ld(as_Register(reg_num), offset, R1_SP); break; } @@ -332,8 +334,8 @@ case RegisterSaver::special_reg: { if (reg_num == SR_CTR_SpecialRegisterEnumValue) { if (restore_ctr) { // Nothing to do here if ctr already contains the next address. - __ ld(R30, offset, R1_SP); - __ mtctr(R30); + __ ld(R31, offset, R1_SP); + __ mtctr(R31); } } else { Unimplemented(); @@ -350,10 +352,10 @@ __ pop_frame(); // restore the flags - __ restore_LR_CR(R30); + __ restore_LR_CR(R31); // restore scratch register's value - __ ld(R30, -reg_size, R1_SP); + __ ld(R31, -reg_size, R1_SP); BLOCK_COMMENT("} restore_live_registers_and_pop_frame"); } @@ -2021,6 +2023,8 @@ __ push_frame(frame_size_in_bytes, r_temp_1); // Push the c2n adapter's frame. frame_done_pc = (intptr_t)__ pc(); + __ verify_thread(); + // Native nmethod wrappers never take possesion of the oop arguments. // So the caller will gc the arguments. // The only thing we need an oopMap for is if the call is static. @@ -2594,7 +2598,7 @@ } uint SharedRuntime::out_preserve_stack_slots() { -#ifdef COMPILER2 +#if defined(COMPILER1) || defined(COMPILER2) return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size; #else return 0; @@ -2868,11 +2872,6 @@ __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread); __ BIND(skip_restore_excp); - // reload narrro_oop_base - if (UseCompressedOops && Universe::narrow_oop_base() != 0) { - __ load_const_optimized(R30, Universe::narrow_oop_base()); - } - __ pop_frame(); // stack: (deoptee, optional i2c, caller of deoptee, ...).
--- a/src/cpu/ppc/vm/stubGenerator_ppc.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/stubGenerator_ppc.cpp Fri May 01 03:56:01 2015 -0700 @@ -261,9 +261,6 @@ // global toc register __ load_const(R29, MacroAssembler::global_toc(), R11_scratch1); - // Load narrow oop base. - __ reinit_heapbase(R30, R11_scratch1); - // Remember the senderSP so we interpreter can pop c2i arguments off of the stack // when called via a c2i. @@ -418,6 +415,23 @@ // or native call stub. The pending exception in Thread is // converted into a Java-level exception. // + // Read: + // + // LR: The pc the runtime library callee wants to return to. + // Since the exception occurred in the callee, the return pc + // from the point of view of Java is the exception pc. + // thread: Needed for method handles. + // + // Invalidate: + // + // volatile registers (except below). + // + // Update: + // + // R4_ARG2: exception + // + // (LR is unchanged and is live out). + // address generate_forward_exception() { StubCodeMark mark(this, "StubRoutines", "forward_exception"); address start = __ pc(); @@ -1256,9 +1270,9 @@ Register tmp3 = R8_ARG6; #if defined(ABI_ELFv2) - address nooverlap_target = aligned ? - StubRoutines::arrayof_jbyte_disjoint_arraycopy() : - StubRoutines::jbyte_disjoint_arraycopy(); + address nooverlap_target = aligned ? + StubRoutines::arrayof_jbyte_disjoint_arraycopy() : + StubRoutines::jbyte_disjoint_arraycopy(); #else address nooverlap_target = aligned ? ((FunctionDescriptor*)StubRoutines::arrayof_jbyte_disjoint_arraycopy())->entry() :
--- a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. - * Copyright 2013, 2014 SAP AG. All rights reserved. + * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2013, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -264,11 +264,11 @@ __ cmpdi(CCR0, Rmdo, 0); __ beq(CCR0, no_mdo); - // Increment invocation counter in the MDO. - const int mdo_ic_offs = in_bytes(MethodData::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); - __ lwz(Rscratch2, mdo_ic_offs, Rmdo); + // Increment backedge counter in the MDO. + const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); + __ lwz(Rscratch2, mdo_bc_offs, Rmdo); __ addi(Rscratch2, Rscratch2, increment); - __ stw(Rscratch2, mdo_ic_offs, Rmdo); + __ stw(Rscratch2, mdo_bc_offs, Rmdo); __ load_const_optimized(Rscratch1, mask, R0); __ and_(Rscratch1, Rscratch2, Rscratch1); __ bne(CCR0, done); @@ -276,12 +276,12 @@ } // Increment counter in MethodCounters*. - const int mo_ic_offs = in_bytes(MethodCounters::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); + const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); __ bind(no_mdo); __ get_method_counters(R19_method, R3_counters, done); - __ lwz(Rscratch2, mo_ic_offs, R3_counters); + __ lwz(Rscratch2, mo_bc_offs, R3_counters); __ addi(Rscratch2, Rscratch2, increment); - __ stw(Rscratch2, mo_ic_offs, R3_counters); + __ stw(Rscratch2, mo_bc_offs, R3_counters); __ load_const_optimized(Rscratch1, mask, R0); __ and_(Rscratch1, Rscratch2, Rscratch1); __ beq(CCR0, *overflow); @@ -611,12 +611,7 @@ // For others we can use a normal (native) entry. inline bool math_entry_available(AbstractInterpreter::MethodKind kind) { - // Provide math entry with debugging on demand. - // Note: Debugging changes which code will get executed: - // Debugging or disabled InlineIntrinsics: java method will get interpreted and performs a native call. - // Not debugging and enabled InlineIntrinics: processor instruction will get used. - // Result might differ slightly due to rounding etc. - if (!InlineIntrinsics && (!FLAG_IS_ERGO(InlineIntrinsics))) return false; // Generate a vanilla entry. + if (!InlineIntrinsics) return false; return ((kind==Interpreter::java_lang_math_sqrt && VM_Version::has_fsqrt()) || (kind==Interpreter::java_lang_math_abs)); @@ -628,15 +623,8 @@ return Interpreter::entry_for_kind(Interpreter::zerolocals); } - Label Lslow_path; - const Register Rjvmti_mode = R11_scratch1; address entry = __ pc(); - // Provide math entry with debugging on demand. - __ lwz(Rjvmti_mode, thread_(interp_only_mode)); - __ cmpwi(CCR0, Rjvmti_mode, 0); - __ bne(CCR0, Lslow_path); // jvmti_mode!=0 - __ lfd(F1_RET, Interpreter::stackElementSize, R15_esp); // Pop c2i arguments (if any) off when we return. @@ -659,9 +647,6 @@ // And we're done. __ blr(); - // Provide slow path for JVMTI case. - __ bind(Lslow_path); - __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R12_scratch2); __ flush(); return entry;
--- a/src/cpu/ppc/vm/templateInterpreter_ppc.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/templateInterpreter_ppc.hpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. - * Copyright 2013, 2014 SAP AG. All rights reserved. + * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2013, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,7 +34,7 @@ // Run with +PrintInterpreter to get the VM to print out the size. // Max size with JVMTI - const static int InterpreterCodeSize = 210*K; + const static int InterpreterCodeSize = 230*K; #endif // CPU_PPC_VM_TEMPLATEINTERPRETER_PPC_HPP
--- a/src/cpu/ppc/vm/templateTable_ppc_64.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/templateTable_ppc_64.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. * Copyright 2013, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -375,23 +375,22 @@ int index_size = wide ? sizeof(u2) : sizeof(u1); const Register Rscratch = R11_scratch1; - Label resolved; + Label is_null; // We are resolved if the resolved reference cache entry contains a // non-null object (CallSite, etc.) __ get_cache_index_at_bcp(Rscratch, 1, index_size); // Load index. - __ load_resolved_reference_at_index(R17_tos, Rscratch); - __ cmpdi(CCR0, R17_tos, 0); - __ bne(CCR0, resolved); + __ load_resolved_reference_at_index(R17_tos, Rscratch, &is_null); + __ verify_oop(R17_tos); + __ dispatch_epilog(atos, Bytecodes::length_for(bytecode())); + + __ bind(is_null); __ load_const_optimized(R3_ARG1, (int)bytecode()); address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); // First time invocation - must resolve first. __ call_VM(R17_tos, entry, R3_ARG1); - - __ align(32, 12); - __ bind(resolved); __ verify_oop(R17_tos); } @@ -437,6 +436,14 @@ } void TemplateTable::iload() { + iload_internal(); +} + +void TemplateTable::nofast_iload() { + iload_internal(may_not_rewrite); +} + +void TemplateTable::iload_internal(RewriteControl rc) { transition(vtos, itos); // Get the local value into tos @@ -445,7 +452,7 @@ // Rewrite iload,iload pair into fast_iload2 // iload,caload pair into fast_icaload - if (RewriteFrequentPairs) { + if (RewriteFrequentPairs && rc == may_rewrite) { Label Lrewrite, Ldone; Register Rnext_byte = R3_ARG1, Rrewrite_to = R6_ARG4, @@ -709,6 +716,14 @@ } void TemplateTable::aload_0() { + aload_0_internal(); +} + +void TemplateTable::nofast_aload_0() { + aload_0_internal(may_not_rewrite); +} + +void TemplateTable::aload_0_internal(RewriteControl rc) { transition(vtos, atos); // According to bytecode histograms, the pairs: // @@ -732,7 +747,7 @@ // These bytecodes with a small amount of code are most profitable // to rewrite. - if (RewriteFrequentPairs) { + if (RewriteFrequentPairs && rc == may_rewrite) { Label Lrewrite, Ldont_rewrite; Register Rnext_byte = R3_ARG1, @@ -2144,6 +2159,12 @@ __ get_cache_and_index_at_bcp(Rcache, 1, index_size); Label Lresolved, Ldone; + Bytecodes::Code code = bytecode(); + switch (code) { + case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; + case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; + } + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); // We are resolved if the indices offset contains the current bytecode. #if defined(VM_LITTLE_ENDIAN) @@ -2152,24 +2173,11 @@ __ lbz(Rscratch, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (byte_no + 1), Rcache); #endif // Acquire by cmp-br-isync (see below). - __ cmpdi(CCR0, Rscratch, (int)bytecode()); + __ cmpdi(CCR0, Rscratch, (int)code); __ beq(CCR0, Lresolved); - address entry = NULL; - switch (bytecode()) { - case Bytecodes::_getstatic : // fall through - case Bytecodes::_putstatic : // fall through - case Bytecodes::_getfield : // fall through - case Bytecodes::_putfield : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); break; - case Bytecodes::_invokevirtual : // fall through - case Bytecodes::_invokespecial : // fall through - case Bytecodes::_invokestatic : // fall through - case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); break; - case Bytecodes::_invokehandle : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle); break; - case Bytecodes::_invokedynamic : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); break; - default : ShouldNotReachHere(); break; - } - __ li(R4_ARG2, (int)bytecode()); + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); + __ li(R4_ARG2, code); __ call_VM(noreg, entry, R4_ARG2, true); // Update registers with resolved info. @@ -2350,7 +2358,7 @@ } // PPC64: implement volatile loads as fence-store-acquire. -void TemplateTable::getfield_or_static(int byte_no, bool is_static) { +void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { transition(vtos, vtos); Label Lacquire, Lisync; @@ -2366,7 +2374,7 @@ static address field_branch_table[number_of_states], static_branch_table[number_of_states]; - address* branch_table = is_static ? static_branch_table : field_branch_table; + address* branch_table = (is_static || rc == may_not_rewrite) ? static_branch_table : field_branch_table; // Get field offset. resolve_cache_and_index(byte_no, Rcache, Rscratch, sizeof(u2)); @@ -2417,7 +2425,14 @@ #ifdef ASSERT __ bind(LFlagInvalid); __ stop("got invalid flag", 0x654); - +#endif + + if (!is_static && rc == may_not_rewrite) { + // We reuse the code from is_static. It's jumped to via the table above. + return; + } + +#ifdef ASSERT // __ bind(Lvtos); address pc_before_fence = __ pc(); __ fence(); // Volatile entry point (one instruction before non-volatile_entry point). @@ -2434,7 +2449,9 @@ branch_table[dtos] = __ pc(); // non-volatile_entry point __ lfdx(F15_ftos, Rclass_or_obj, Roffset); __ push(dtos); - if (!is_static) patch_bytecode(Bytecodes::_fast_dgetfield, Rbc, Rscratch); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dgetfield, Rbc, Rscratch); + } { Label acquire_double; __ beq(CCR6, acquire_double); // Volatile? @@ -2453,7 +2470,9 @@ branch_table[ftos] = __ pc(); // non-volatile_entry point __ lfsx(F15_ftos, Rclass_or_obj, Roffset); __ push(ftos); - if (!is_static) { patch_bytecode(Bytecodes::_fast_fgetfield, Rbc, Rscratch); } + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fgetfield, Rbc, Rscratch); + } { Label acquire_float; __ beq(CCR6, acquire_float); // Volatile? @@ -2472,7 +2491,9 @@ branch_table[itos] = __ pc(); // non-volatile_entry point __ lwax(R17_tos, Rclass_or_obj, Roffset); __ push(itos); - if (!is_static) patch_bytecode(Bytecodes::_fast_igetfield, Rbc, Rscratch); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_igetfield, Rbc, Rscratch); + } __ beq(CCR6, Lacquire); // Volatile? __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); @@ -2483,7 +2504,9 @@ branch_table[ltos] = __ pc(); // non-volatile_entry point __ ldx(R17_tos, Rclass_or_obj, Roffset); __ push(ltos); - if (!is_static) patch_bytecode(Bytecodes::_fast_lgetfield, Rbc, Rscratch); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lgetfield, Rbc, Rscratch); + } __ beq(CCR6, Lacquire); // Volatile? __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); @@ -2495,7 +2518,9 @@ __ lbzx(R17_tos, Rclass_or_obj, Roffset); __ extsb(R17_tos, R17_tos); __ push(btos); - if (!is_static) patch_bytecode(Bytecodes::_fast_bgetfield, Rbc, Rscratch); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bgetfield, Rbc, Rscratch); + } __ beq(CCR6, Lacquire); // Volatile? __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); @@ -2506,7 +2531,9 @@ branch_table[ctos] = __ pc(); // non-volatile_entry point __ lhzx(R17_tos, Rclass_or_obj, Roffset); __ push(ctos); - if (!is_static) patch_bytecode(Bytecodes::_fast_cgetfield, Rbc, Rscratch); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cgetfield, Rbc, Rscratch); + } __ beq(CCR6, Lacquire); // Volatile? __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); @@ -2517,7 +2544,9 @@ branch_table[stos] = __ pc(); // non-volatile_entry point __ lhax(R17_tos, Rclass_or_obj, Roffset); __ push(stos); - if (!is_static) patch_bytecode(Bytecodes::_fast_sgetfield, Rbc, Rscratch); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sgetfield, Rbc, Rscratch); + } __ beq(CCR6, Lacquire); // Volatile? __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); @@ -2530,7 +2559,9 @@ __ verify_oop(R17_tos); __ push(atos); //__ dcbt(R17_tos); // prefetch - if (!is_static) patch_bytecode(Bytecodes::_fast_agetfield, Rbc, Rscratch); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, Rbc, Rscratch); + } __ beq(CCR6, Lacquire); // Volatile? __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); @@ -2553,6 +2584,10 @@ getfield_or_static(byte_no, false); } +void TemplateTable::nofast_getfield(int byte_no) { + getfield_or_static(byte_no, false, may_not_rewrite); +} + void TemplateTable::getstatic(int byte_no) { getfield_or_static(byte_no, true); } @@ -2643,7 +2678,7 @@ } // PPC64: implement volatile stores as release-store (return bytecode contains an additional release). -void TemplateTable::putfield_or_static(int byte_no, bool is_static) { +void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { Label Lvolatile; const Register Rcache = R5_ARG3, // Do not use ARG1/2 (causes trouble in jvmti_post_field_mod). @@ -2657,10 +2692,12 @@ Rbc = Rscratch3; const ConditionRegister CR_is_vol = CCR2; // Non-volatile condition register (survives runtime call in do_oop_store). - static address field_branch_table[number_of_states], + static address field_rw_branch_table[number_of_states], + field_norw_branch_table[number_of_states], static_branch_table[number_of_states]; - address* branch_table = is_static ? static_branch_table : field_branch_table; + address* branch_table = is_static ? static_branch_table : + (rc == may_rewrite ? field_rw_branch_table : field_norw_branch_table); // Stack (grows up): // value @@ -2688,7 +2725,9 @@ // Load from branch table and dispatch (volatile case: one instruction ahead). __ sldi(Rflags, Rflags, LogBytesPerWord); - if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ cmpwi(CR_is_vol, Rscratch, 1); } // Volatile? + if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ cmpwi(CR_is_vol, Rscratch, 1); // Volatile? + } __ sldi(Rscratch, Rscratch, exact_log2(BytesPerInstWord)); // Volatile? size of instruction 1 : 0. __ ldx(Rbtable, Rbtable, Rflags); @@ -2715,9 +2754,13 @@ assert(branch_table[dtos] == 0, "can't compute twice"); branch_table[dtos] = __ pc(); // non-volatile_entry point __ pop(dtos); - if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1. + if (!is_static) { + pop_and_check_object(Rclass_or_obj); // Kills R11_scratch1. + } __ stfdx(F15_ftos, Rclass_or_obj, Roffset); - if (!is_static) { patch_bytecode(Bytecodes::_fast_dputfield, Rbc, Rscratch, true, byte_no); } + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dputfield, Rbc, Rscratch, true, byte_no); + } if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ beq(CR_is_vol, Lvolatile); // Volatile? } @@ -2731,7 +2774,9 @@ __ pop(ftos); if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1. __ stfsx(F15_ftos, Rclass_or_obj, Roffset); - if (!is_static) { patch_bytecode(Bytecodes::_fast_fputfield, Rbc, Rscratch, true, byte_no); } + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fputfield, Rbc, Rscratch, true, byte_no); + } if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ beq(CR_is_vol, Lvolatile); // Volatile? } @@ -2745,7 +2790,9 @@ __ pop(itos); if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1. __ stwx(R17_tos, Rclass_or_obj, Roffset); - if (!is_static) { patch_bytecode(Bytecodes::_fast_iputfield, Rbc, Rscratch, true, byte_no); } + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_iputfield, Rbc, Rscratch, true, byte_no); + } if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ beq(CR_is_vol, Lvolatile); // Volatile? } @@ -2759,7 +2806,9 @@ __ pop(ltos); if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1. __ stdx(R17_tos, Rclass_or_obj, Roffset); - if (!is_static) { patch_bytecode(Bytecodes::_fast_lputfield, Rbc, Rscratch, true, byte_no); } + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lputfield, Rbc, Rscratch, true, byte_no); + } if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ beq(CR_is_vol, Lvolatile); // Volatile? } @@ -2773,7 +2822,9 @@ __ pop(btos); if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1. __ stbx(R17_tos, Rclass_or_obj, Roffset); - if (!is_static) { patch_bytecode(Bytecodes::_fast_bputfield, Rbc, Rscratch, true, byte_no); } + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bputfield, Rbc, Rscratch, true, byte_no); + } if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ beq(CR_is_vol, Lvolatile); // Volatile? } @@ -2787,7 +2838,9 @@ __ pop(ctos); if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.. __ sthx(R17_tos, Rclass_or_obj, Roffset); - if (!is_static) { patch_bytecode(Bytecodes::_fast_cputfield, Rbc, Rscratch, true, byte_no); } + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cputfield, Rbc, Rscratch, true, byte_no); + } if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ beq(CR_is_vol, Lvolatile); // Volatile? } @@ -2801,7 +2854,9 @@ __ pop(stos); if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1. __ sthx(R17_tos, Rclass_or_obj, Roffset); - if (!is_static) { patch_bytecode(Bytecodes::_fast_sputfield, Rbc, Rscratch, true, byte_no); } + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sputfield, Rbc, Rscratch, true, byte_no); + } if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ beq(CR_is_vol, Lvolatile); // Volatile? } @@ -2815,7 +2870,9 @@ __ pop(atos); if (!is_static) { pop_and_check_object(Rclass_or_obj); } // kills R11_scratch1 do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, _bs->kind(), false /* precise */, true /* check null */); - if (!is_static) { patch_bytecode(Bytecodes::_fast_aputfield, Rbc, Rscratch, true, byte_no); } + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, Rbc, Rscratch, true, byte_no); + } if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ beq(CR_is_vol, Lvolatile); // Volatile? __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); @@ -2839,6 +2896,10 @@ putfield_or_static(byte_no, false); } +void TemplateTable::nofast_putfield(int byte_no) { + putfield_or_static(byte_no, false, may_not_rewrite); +} + void TemplateTable::putstatic(int byte_no) { putfield_or_static(byte_no, true); } @@ -3259,7 +3320,9 @@ __ testbitdi(CCR0, R0, Rflags, ConstantPoolCacheEntry::is_vfinal_shift); __ bfalse(CCR0, LnotFinal); - patch_bytecode(Bytecodes::_fast_invokevfinal, Rnew_bc, R12_scratch2); + if (RewriteBytecodes && !UseSharedSpaces) { + patch_bytecode(Bytecodes::_fast_invokevfinal, Rnew_bc, R12_scratch2); + } invokevfinal_helper(Rvtableindex_or_method, Rflags, R11_scratch1, R12_scratch2); __ align(32, 12); @@ -3795,9 +3858,9 @@ transition(atos, itos); Label Ldone, Lis_null, Lquicked, Lresolved; - Register Roffset = R5_ARG3, + Register Roffset = R6_ARG4, RobjKlass = R4_ARG2, - RspecifiedKlass = R6_ARG4, // Generate_ClassCastException_verbose_handler will expect the value in this register. + RspecifiedKlass = R5_ARG3, Rcpool = R11_scratch1, Rtags = R12_scratch2;
--- a/src/cpu/ppc/vm/vm_version_ppc.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/vm_version_ppc.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,12 +32,13 @@ #include "runtime/os.hpp" #include "runtime/stubCodeGenerator.hpp" #include "utilities/defaultStream.hpp" +#include "utilities/globalDefinitions.hpp" #include "vm_version_ppc.hpp" # include <sys/sysinfo.h> int VM_Version::_features = VM_Version::unknown_m; -int VM_Version::_measured_cache_line_size = 128; // default value +int VM_Version::_measured_cache_line_size = 32; // pessimistic init value const char* VM_Version::_features_str = ""; bool VM_Version::_is_determine_features_test_running = false; @@ -55,7 +56,9 @@ // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features. if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) { - if (VM_Version::has_popcntw()) { + if (VM_Version::has_lqarx()) { + FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8); + } else if (VM_Version::has_popcntw()) { FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7); } else if (VM_Version::has_cmpb()) { FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6); @@ -66,8 +69,14 @@ } } guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 || - PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7, - "PowerArchitecturePPC64 should be 0, 5, 6 or 7"); + PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7 || + PowerArchitecturePPC64 == 8, + "PowerArchitecturePPC64 should be 0, 5, 6, 7, or 8"); + + // Power 8: Configure Data Stream Control Register. + if (PowerArchitecturePPC64 >= 8) { + config_dscr(); + } if (!UseSIGTRAP) { MSG(TrapBasedICMissChecks); @@ -97,7 +106,7 @@ // Create and print feature-string. char buf[(num_features+1) * 16]; // Max 16 chars per feature. jio_snprintf(buf, sizeof(buf), - "ppc64%s%s%s%s%s%s%s%s", + "ppc64%s%s%s%s%s%s%s%s%s%s%s%s", (has_fsqrt() ? " fsqrt" : ""), (has_isel() ? " isel" : ""), (has_lxarxeh() ? " lxarxeh" : ""), @@ -106,11 +115,17 @@ (has_popcntb() ? " popcntb" : ""), (has_popcntw() ? " popcntw" : ""), (has_fcfids() ? " fcfids" : ""), - (has_vand() ? " vand" : "") + (has_vand() ? " vand" : ""), + (has_lqarx() ? " lqarx" : ""), + (has_vcipher() ? " vcipher" : ""), + (has_vpmsumb() ? " vpmsumb" : ""), + (has_tcheck() ? " tcheck" : "") // Make sure number of %s matches num_features! ); _features_str = os::strdup(buf); - NOT_PRODUCT(if (Verbose) print_features();); + if (Verbose) { + print_features(); + } // PPC64 supports 8-byte compare-exchange operations (see // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr) @@ -171,7 +186,86 @@ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } + // Adjust RTM (Restricted Transactional Memory) flags. + if (!has_tcheck() && UseRTMLocking) { + // Can't continue because UseRTMLocking affects UseBiasedLocking flag + // setting during arguments processing. See use_biased_locking(). + // VM_Version_init() is executed after UseBiasedLocking is used + // in Thread::allocate(). + vm_exit_during_initialization("RTM instructions are not available on this CPU"); + } + if (UseRTMLocking) { +#if INCLUDE_RTM_OPT + if (!UnlockExperimentalVMOptions) { + vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. " + "It must be enabled via -XX:+UnlockExperimentalVMOptions flag."); + } else { + warning("UseRTMLocking is only available as experimental option on this platform."); + } + if (!FLAG_IS_CMDLINE(UseRTMLocking)) { + // RTM locking should be used only for applications with + // high lock contention. For now we do not use it by default. + vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); + } + if (!is_power_of_2(RTMTotalCountIncrRate)) { + warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64"); + FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64); + } + if (RTMAbortRatio < 0 || RTMAbortRatio > 100) { + warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50"); + FLAG_SET_DEFAULT(RTMAbortRatio, 50); + } + FLAG_SET_ERGO(bool, UseNewFastLockPPC64, false); // Does not implement TM. + guarantee(RTMSpinLoopCount > 0, "unsupported"); +#else + // Only C2 does RTM locking optimization. + // Can't continue because UseRTMLocking affects UseBiasedLocking flag + // setting during arguments processing. See use_biased_locking(). + vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); +#endif + } else { // !UseRTMLocking + if (UseRTMForStackLocks) { + if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) { + warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off"); + } + FLAG_SET_DEFAULT(UseRTMForStackLocks, false); + } + if (UseRTMDeopt) { + FLAG_SET_DEFAULT(UseRTMDeopt, false); + } + if (PrintPreciseRTMLockingStatistics) { + FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false); + } + } + + // This machine does not allow unaligned memory accesses + if (UseUnalignedAccesses) { + if (!FLAG_IS_DEFAULT(UseUnalignedAccesses)) + warning("Unaligned memory access is not available on this CPU"); + FLAG_SET_DEFAULT(UseUnalignedAccesses, false); + } +} + +bool VM_Version::use_biased_locking() { +#if INCLUDE_RTM_OPT + // RTM locking is most useful when there is high lock contention and + // low data contention. With high lock contention the lock is usually + // inflated and biased locking is not suitable for that case. + // RTM locking code requires that biased locking is off. + // Note: we can't switch off UseBiasedLocking in get_processor_features() + // because it is used by Thread::allocate() which is called before + // VM_Version::initialize(). + if (UseRTMLocking && UseBiasedLocking) { + if (FLAG_IS_DEFAULT(UseBiasedLocking)) { + FLAG_SET_DEFAULT(UseBiasedLocking, false); + } else { + warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." ); + UseBiasedLocking = false; + } + } +#endif + return UseBiasedLocking; } void VM_Version::print_features() { @@ -437,16 +531,19 @@ // Don't use R0 in ldarx. // Keep R3_ARG1 unmodified, it contains &field (see below). // Keep R4_ARG2 unmodified, it contains offset = 0 (see below). - a->fsqrt(F3, F4); // code[0] -> fsqrt_m - a->fsqrts(F3, F4); // code[1] -> fsqrts_m - a->isel(R7, R5, R6, 0); // code[2] -> isel_m - a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m - a->cmpb(R7, R5, R6); // code[4] -> bcmp - //a->mftgpr(R7, F3); // code[5] -> mftgpr - a->popcntb(R7, R5); // code[6] -> popcntb - a->popcntw(R7, R5); // code[7] -> popcntw - a->fcfids(F3, F4); // code[8] -> fcfids - a->vand(VR0, VR0, VR0); // code[9] -> vand + a->fsqrt(F3, F4); // code[0] -> fsqrt_m + a->fsqrts(F3, F4); // code[1] -> fsqrts_m + a->isel(R7, R5, R6, 0); // code[2] -> isel_m + a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m + a->cmpb(R7, R5, R6); // code[4] -> cmpb + a->popcntb(R7, R5); // code[5] -> popcntb + a->popcntw(R7, R5); // code[6] -> popcntw + a->fcfids(F3, F4); // code[7] -> fcfids + a->vand(VR0, VR0, VR0); // code[8] -> vand + a->lqarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[9] -> lqarx_m + a->vcipher(VR0, VR1, VR2); // code[10] -> vcipher + a->vpmsumb(VR0, VR1, VR2); // code[11] -> vpmsumb + a->tcheck(0); // code[12] -> tcheck a->blr(); // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it. @@ -485,11 +582,14 @@ if (code[feature_cntr++]) features |= isel_m; if (code[feature_cntr++]) features |= lxarxeh_m; if (code[feature_cntr++]) features |= cmpb_m; - //if(code[feature_cntr++])features |= mftgpr_m; if (code[feature_cntr++]) features |= popcntb_m; if (code[feature_cntr++]) features |= popcntw_m; if (code[feature_cntr++]) features |= fcfids_m; if (code[feature_cntr++]) features |= vand_m; + if (code[feature_cntr++]) features |= lqarx_m; + if (code[feature_cntr++]) features |= vcipher_m; + if (code[feature_cntr++]) features |= vpmsumb_m; + if (code[feature_cntr++]) features |= tcheck_m; // Print the detection code. if (PrintAssembly) { @@ -501,6 +601,69 @@ _features = features; } +// Power 8: Configure Data Stream Control Register. +void VM_Version::config_dscr() { + assert(has_tcheck(), "Only execute on Power 8 or later!"); + + // 7 InstWords for each call (function descriptor + blr instruction). + const int code_size = (2+2*7)*BytesPerInstWord; + + // Allocate space for the code. + ResourceMark rm; + CodeBuffer cb("config_dscr", code_size, 0); + MacroAssembler* a = new MacroAssembler(&cb); + + // Emit code. + uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->emit_fd(); + uint32_t *code = (uint32_t *)a->pc(); + a->mfdscr(R3); + a->blr(); + + void (*set_dscr)(long) = (void(*)(long))(void *)a->emit_fd(); + a->mtdscr(R3); + a->blr(); + + uint32_t *code_end = (uint32_t *)a->pc(); + a->flush(); + + // Print the detection code. + if (PrintAssembly) { + ttyLocker ttyl; + tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", code); + Disassembler::decode((u_char*)code, (u_char*)code_end, tty); + } + + // Apply the configuration if needed. + uint64_t dscr_val = (*get_dscr)(); + if (Verbose) { + tty->print_cr("dscr value was 0x%lx" , dscr_val); + } + bool change_requested = false; + if (DSCR_PPC64 != (uintx)-1) { + dscr_val = DSCR_PPC64; + change_requested = true; + } + if (DSCR_DPFD_PPC64 <= 7) { + uint64_t mask = 0x7; + if ((dscr_val & mask) != DSCR_DPFD_PPC64) { + dscr_val = (dscr_val & ~mask) | (DSCR_DPFD_PPC64); + change_requested = true; + } + } + if (DSCR_URG_PPC64 <= 7) { + uint64_t mask = 0x7 << 6; + if ((dscr_val & mask) != DSCR_DPFD_PPC64 << 6) { + dscr_val = (dscr_val & ~mask) | (DSCR_URG_PPC64 << 6); + change_requested = true; + } + } + if (change_requested) { + (*set_dscr)(dscr_val); + if (Verbose) { + tty->print_cr("dscr was set to 0x%lx" , (*get_dscr)()); + } + } +} static int saved_features = 0;
--- a/src/cpu/ppc/vm/vm_version_ppc.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/vm_version_ppc.hpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,7 +41,10 @@ popcntw, fcfids, vand, - dcba, + lqarx, + vcipher, + vpmsumb, + tcheck, num_features // last entry to count features }; enum Feature_Flag_Set { @@ -55,7 +58,10 @@ popcntw_m = (1 << popcntw), fcfids_m = (1 << fcfids ), vand_m = (1 << vand ), - dcba_m = (1 << dcba ), + lqarx_m = (1 << lqarx ), + vcipher_m = (1 << vcipher), + vpmsumb_m = (1 << vpmsumb), + tcheck_m = (1 << tcheck ), all_features_m = -1 }; static int _features; @@ -65,12 +71,16 @@ static void print_features(); static void determine_features(); // also measures cache line size + static void config_dscr(); // Power 8: Configure Data Stream Control Register. static void determine_section_size(); static void power6_micro_bench(); public: // Initialization static void initialize(); + // Override Abstract_VM_Version implementation + static bool use_biased_locking(); + static bool is_determine_features_test_running() { return _is_determine_features_test_running; } // CPU instruction support static bool has_fsqrt() { return (_features & fsqrt_m) != 0; } @@ -82,7 +92,10 @@ static bool has_popcntw() { return (_features & popcntw_m) != 0; } static bool has_fcfids() { return (_features & fcfids_m) != 0; } static bool has_vand() { return (_features & vand_m) != 0; } - static bool has_dcba() { return (_features & dcba_m) != 0; } + static bool has_lqarx() { return (_features & lqarx_m) != 0; } + static bool has_vcipher() { return (_features & vcipher_m) != 0; } + static bool has_vpmsumb() { return (_features & vpmsumb_m) != 0; } + static bool has_tcheck() { return (_features & tcheck_m) != 0; } static const char* cpu_features() { return _features_str; }
--- a/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,7 +24,6 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" #include "asm/macroAssembler.inline.hpp" #include "code/vtableStubs.hpp" #include "interp_masm_ppc_64.hpp"
--- a/src/cpu/sparc/vm/globals_sparc.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/sparc/vm/globals_sparc.hpp Fri May 01 03:56:01 2015 -0700 @@ -74,6 +74,8 @@ define_pd_global(bool, UseMembar, false); +define_pd_global(bool, PreserveFramePointer, false); + // GC Ergo Flags define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // default max size of CMS young gen, per GC worker thread
--- a/src/cpu/sparc/vm/interp_masm_sparc.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/sparc/vm/interp_masm_sparc.hpp Fri May 01 03:56:01 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -203,7 +203,6 @@ void field_offset_at(int n, Register tmp, Register dest, Register base); int field_offset_at(Register object, address bcp, int offset); void fast_iaaccess(int n, address bcp); - void fast_iagetfield(address bcp); void fast_iaputfield(address bcp, bool do_store_check ); void index_check(Register array, Register index, int index_shift, Register tmp, Register res);
--- a/src/cpu/sparc/vm/macroAssembler_sparc.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/sparc/vm/macroAssembler_sparc.cpp Fri May 01 03:56:01 2015 -0700 @@ -3019,44 +3019,107 @@ // past the store that releases the lock. But TSO is a strong memory model // and that particular flavor of barrier is a noop, so we can safely elide it. // Note that we use 1-0 locking by default for the inflated case. We - // close the resultant (and rare) race by having contented threads in + // close the resultant (and rare) race by having contended threads in // monitorenter periodically poll _owner. - ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch); - ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), Rbox); - xor3(Rscratch, G2_thread, Rscratch); - orcc(Rbox, Rscratch, Rbox); - brx(Assembler::notZero, false, Assembler::pn, done); - delayed()-> - ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList), Rscratch); - ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq), Rbox); - orcc(Rbox, Rscratch, G0); - if (EmitSync & 65536) { - Label LSucc ; - brx(Assembler::notZero, false, Assembler::pn, LSucc); - delayed()->nop(); - ba(done); - delayed()->st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)); - - bind(LSucc); - st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)); - if (os::is_MP()) { membar (StoreLoad); } - ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ), Rscratch); - andcc(Rscratch, Rscratch, G0); - brx(Assembler::notZero, false, Assembler::pt, done); - delayed()->andcc(G0, G0, G0); - add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); - mov(G2_thread, Rscratch); - cas_ptr(Rmark, G0, Rscratch); - // invert icc.zf and goto done - br_notnull(Rscratch, false, Assembler::pt, done); - delayed()->cmp(G0, G0); - ba(done); - delayed()->cmp(G0, 1); + + if (EmitSync & 1024) { + // Emit code to check that _owner == Self + // We could fold the _owner test into subsequent code more efficiently + // than using a stand-alone check, but since _owner checking is off by + // default we don't bother. We also might consider predicating the + // _owner==Self check on Xcheck:jni or running on a debug build. + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), Rscratch); + orcc(Rscratch, G0, G0); + brx(Assembler::notZero, false, Assembler::pn, done); + delayed()->nop(); + } + + if (EmitSync & 512) { + // classic lock release code absent 1-0 locking + // m->Owner = null; + // membar #storeload + // if (m->cxq|m->EntryList) == null goto Success + // if (m->succ != null) goto Success + // if CAS (&m->Owner,0,Self) != 0 goto Success + // goto SlowPath + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox); + orcc(Rbox, G0, G0); + brx(Assembler::notZero, false, Assembler::pn, done); + delayed()->nop(); + st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + if (os::is_MP()) { membar(StoreLoad); } + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch); + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox); + orcc(Rbox, Rscratch, G0); + brx(Assembler::zero, false, Assembler::pt, done); + delayed()-> + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch); + andcc(Rscratch, Rscratch, G0); + brx(Assembler::notZero, false, Assembler::pt, done); + delayed()->andcc(G0, G0, G0); + add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); + mov(G2_thread, Rscratch); + cas_ptr(Rmark, G0, Rscratch); + cmp(Rscratch, G0); + // invert icc.zf and goto done + brx(Assembler::notZero, false, Assembler::pt, done); + delayed()->cmp(G0, G0); + br(Assembler::always, false, Assembler::pt, done); + delayed()->cmp(G0, 1); } else { - brx(Assembler::notZero, false, Assembler::pn, done); - delayed()->nop(); - ba(done); - delayed()->st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)); + // 1-0 form : avoids CAS and MEMBAR in the common case + // Do not bother to ratify that m->Owner == Self. + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox); + orcc(Rbox, G0, G0); + brx(Assembler::notZero, false, Assembler::pn, done); + delayed()-> + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch); + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox); + orcc(Rbox, Rscratch, G0); + if (EmitSync & 16384) { + // As an optional optimization, if (EntryList|cxq) != null and _succ is null then + // we should transfer control directly to the slow-path. + // This test makes the reacquire operation below very infrequent. + // The logic is equivalent to : + // if (cxq|EntryList) == null : Owner=null; goto Success + // if succ == null : goto SlowPath + // Owner=null; membar #storeload + // if succ != null : goto Success + // if CAS(&Owner,null,Self) != null goto Success + // goto SlowPath + brx(Assembler::zero, true, Assembler::pt, done); + delayed()-> + st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch); + andcc(Rscratch, Rscratch, G0) ; + brx(Assembler::zero, false, Assembler::pt, done); + delayed()->orcc(G0, 1, G0); + st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + } else { + brx(Assembler::zero, false, Assembler::pt, done); + delayed()-> + st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + } + if (os::is_MP()) { membar(StoreLoad); } + // Check that _succ is (or remains) non-zero + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch); + andcc(Rscratch, Rscratch, G0); + brx(Assembler::notZero, false, Assembler::pt, done); + delayed()->andcc(G0, G0, G0); + add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); + mov(G2_thread, Rscratch); + cas_ptr(Rmark, G0, Rscratch); + cmp(Rscratch, G0); + // invert icc.zf and goto done + // A slightly better v8+/v9 idiom would be the following: + // movrnz Rscratch,1,Rscratch + // ba done + // xorcc Rscratch,1,G0 + // In v8+ mode the idiom would be valid IFF Rscratch was a G or O register + brx(Assembler::notZero, false, Assembler::pt, done); + delayed()->cmp(G0, G0); + br(Assembler::always, false, Assembler::pt, done); + delayed()->cmp(G0, 1); } bind (LStacked); @@ -3632,23 +3695,11 @@ if (satb_log_enqueue_with_frame == 0) { generate_satb_log_enqueue(with_frame); assert(satb_log_enqueue_with_frame != 0, "postcondition."); - if (G1SATBPrintStubs) { - tty->print_cr("Generated with-frame satb enqueue:"); - Disassembler::decode((u_char*)satb_log_enqueue_with_frame, - satb_log_enqueue_with_frame_end, - tty); - } } } else { if (satb_log_enqueue_frameless == 0) { generate_satb_log_enqueue(with_frame); assert(satb_log_enqueue_frameless != 0, "postcondition."); - if (G1SATBPrintStubs) { - tty->print_cr("Generated frameless satb enqueue:"); - Disassembler::decode((u_char*)satb_log_enqueue_frameless, - satb_log_enqueue_frameless_end, - tty); - } } } } @@ -3841,12 +3892,6 @@ if (dirty_card_log_enqueue == 0) { generate_dirty_card_log_enqueue(byte_map_base); assert(dirty_card_log_enqueue != 0, "postcondition."); - if (G1SATBPrintStubs) { - tty->print_cr("Generated dirty_card enqueue:"); - Disassembler::decode((u_char*)dirty_card_log_enqueue, - dirty_card_log_enqueue_end, - tty); - } } }
--- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2664,6 +2664,9 @@ // disallows any pending_exception. __ mov(L3_box, O1); + // Pass in current thread pointer + __ mov(G2_thread, O2); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type); __ delayed()->mov(L4, O0); // Need oop in O0
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/sparc/vm/templateTable_sparc.cpp Fri May 01 03:56:01 2015 -0700 @@ -385,7 +385,6 @@ __ verify_oop(Otos_i); } - void TemplateTable::ldc2_w() { transition(vtos, vtos); Label Long, exit; @@ -430,22 +429,28 @@ __ bind(exit); } - void TemplateTable::locals_index(Register reg, int offset) { __ ldub( at_bcp(offset), reg ); } - void TemplateTable::locals_index_wide(Register reg) { // offset is 2, not 1, because Lbcp points to wide prefix code __ get_2_byte_integer_at_bcp(2, G4_scratch, reg, InterpreterMacroAssembler::Unsigned); } void TemplateTable::iload() { + iload_internal(); +} + +void TemplateTable::nofast_iload() { + iload_internal(may_not_rewrite); +} + +void TemplateTable::iload_internal(RewriteControl rc) { transition(vtos, itos); // Rewrite iload,iload pair into fast_iload2 // iload,caload pair into fast_icaload - if (RewriteFrequentPairs) { + if (RewriteFrequentPairs && rc == may_rewrite) { Label rewrite, done; // get next byte @@ -672,8 +677,15 @@ __ ld_ptr( Llocals, Interpreter::local_offset_in_bytes(n), Otos_i ); } - void TemplateTable::aload_0() { + aload_0_internal(); +} + +void TemplateTable::nofast_aload_0() { + aload_0_internal(may_not_rewrite); +} + +void TemplateTable::aload_0_internal(RewriteControl rc) { transition(vtos, atos); // According to bytecode histograms, the pairs: @@ -687,7 +699,7 @@ // bytecode into a pair bytecode; otherwise it rewrites the current // bytecode into _fast_aload_0 that doesn't do the pair check anymore. // - if (RewriteFrequentPairs) { + if (RewriteFrequentPairs && rc == may_rewrite) { Label rewrite, done; // get next byte @@ -731,7 +743,6 @@ } } - void TemplateTable::istore() { transition(itos, vtos); locals_index(G3_scratch); @@ -2045,30 +2056,21 @@ Register index, size_t index_size) { // Depends on cpCacheOop layout! + Label resolved; - - assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); - __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, Lbyte_code, byte_no, 1, index_size); - __ cmp(Lbyte_code, (int) bytecode()); // have we resolved this bytecode? - __ br(Assembler::equal, false, Assembler::pt, resolved); - __ delayed()->set((int)bytecode(), O1); - - address entry; - switch (bytecode()) { - case Bytecodes::_getstatic : // fall through - case Bytecodes::_putstatic : // fall through - case Bytecodes::_getfield : // fall through - case Bytecodes::_putfield : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); break; - case Bytecodes::_invokevirtual : // fall through - case Bytecodes::_invokespecial : // fall through - case Bytecodes::_invokestatic : // fall through - case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); break; - case Bytecodes::_invokehandle : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle); break; - case Bytecodes::_invokedynamic : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); break; - default: - fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode()))); - break; + Bytecodes::Code code = bytecode(); + switch (code) { + case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; + case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; } + + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, Lbyte_code, byte_no, 1, index_size); + __ cmp(Lbyte_code, code); // have we resolved this bytecode? + __ br(Assembler::equal, false, Assembler::pt, resolved); + __ delayed()->set(code, O1); + + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); // first time invocation - must resolve first __ call_VM(noreg, entry, O1); // Update registers with resolved info @@ -2183,7 +2185,7 @@ } } -void TemplateTable::getfield_or_static(int byte_no, bool is_static) { +void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { transition(vtos, vtos); Register Rcache = G3_scratch; @@ -2231,7 +2233,7 @@ __ load_heap_oop(Rclass, Roffset, Otos_i); __ verify_oop(Otos_i); __ push(atos); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_agetfield, G3_scratch, G4_scratch); } __ ba(checkVolatile); @@ -2246,7 +2248,7 @@ // itos __ ld(Rclass, Roffset, Otos_i); __ push(itos); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_igetfield, G3_scratch, G4_scratch); } __ ba(checkVolatile); @@ -2262,7 +2264,7 @@ // load must be atomic __ ld_long(Rclass, Roffset, Otos_l); __ push(ltos); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_lgetfield, G3_scratch, G4_scratch); } __ ba(checkVolatile); @@ -2277,7 +2279,7 @@ // btos __ ldsb(Rclass, Roffset, Otos_i); __ push(itos); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_bgetfield, G3_scratch, G4_scratch); } __ ba(checkVolatile); @@ -2292,7 +2294,7 @@ // ctos __ lduh(Rclass, Roffset, Otos_i); __ push(itos); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_cgetfield, G3_scratch, G4_scratch); } __ ba(checkVolatile); @@ -2307,7 +2309,7 @@ // stos __ ldsh(Rclass, Roffset, Otos_i); __ push(itos); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_sgetfield, G3_scratch, G4_scratch); } __ ba(checkVolatile); @@ -2323,7 +2325,7 @@ // ftos __ ldf(FloatRegisterImpl::S, Rclass, Roffset, Ftos_f); __ push(ftos); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_fgetfield, G3_scratch, G4_scratch); } __ ba(checkVolatile); @@ -2335,7 +2337,7 @@ // dtos __ ldf(FloatRegisterImpl::D, Rclass, Roffset, Ftos_d); __ push(dtos); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_dgetfield, G3_scratch, G4_scratch); } @@ -2350,16 +2352,18 @@ __ bind(exit); } - void TemplateTable::getfield(int byte_no) { getfield_or_static(byte_no, false); } +void TemplateTable::nofast_getfield(int byte_no) { + getfield_or_static(byte_no, false, may_not_rewrite); +} + void TemplateTable::getstatic(int byte_no) { getfield_or_static(byte_no, true); } - void TemplateTable::fast_accessfield(TosState state) { transition(atos, state); Register Rcache = G3_scratch; @@ -2544,7 +2548,7 @@ __ verify_oop(r); } -void TemplateTable::putfield_or_static(int byte_no, bool is_static) { +void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { transition(vtos, vtos); Register Rcache = G3_scratch; Register index = G4_scratch; @@ -2620,7 +2624,7 @@ __ pop_i(); pop_and_check_object(Rclass); __ st(Otos_i, Rclass, Roffset); - patch_bytecode(Bytecodes::_fast_iputfield, G3_scratch, G4_scratch, true, byte_no); + if (rc == may_rewrite) patch_bytecode(Bytecodes::_fast_iputfield, G3_scratch, G4_scratch, true, byte_no); __ ba(checkVolatile); __ delayed()->tst(Lscratch); } @@ -2636,7 +2640,7 @@ pop_and_check_object(Rclass); __ verify_oop(Otos_i); do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false); - patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch, true, byte_no); + if (rc == may_rewrite) patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch, true, byte_no); __ ba(checkVolatile); __ delayed()->tst(Lscratch); } @@ -2653,7 +2657,7 @@ __ pop_i(); if (!is_static) pop_and_check_object(Rclass); __ stb(Otos_i, Rclass, Roffset); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_bputfield, G3_scratch, G4_scratch, true, byte_no); } __ ba(checkVolatile); @@ -2670,7 +2674,7 @@ __ pop_l(); if (!is_static) pop_and_check_object(Rclass); __ st_long(Otos_l, Rclass, Roffset); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_lputfield, G3_scratch, G4_scratch, true, byte_no); } __ ba(checkVolatile); @@ -2687,7 +2691,7 @@ __ pop_i(); if (!is_static) pop_and_check_object(Rclass); __ sth(Otos_i, Rclass, Roffset); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_cputfield, G3_scratch, G4_scratch, true, byte_no); } __ ba(checkVolatile); @@ -2704,7 +2708,7 @@ __ pop_i(); if (!is_static) pop_and_check_object(Rclass); __ sth(Otos_i, Rclass, Roffset); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_sputfield, G3_scratch, G4_scratch, true, byte_no); } __ ba(checkVolatile); @@ -2721,7 +2725,7 @@ __ pop_f(); if (!is_static) pop_and_check_object(Rclass); __ stf(FloatRegisterImpl::S, Ftos_f, Rclass, Roffset); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_fputfield, G3_scratch, G4_scratch, true, byte_no); } __ ba(checkVolatile); @@ -2735,7 +2739,7 @@ __ pop_d(); if (!is_static) pop_and_check_object(Rclass); __ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_dputfield, G3_scratch, G4_scratch, true, byte_no); } } @@ -2809,16 +2813,18 @@ } } - void TemplateTable::putfield(int byte_no) { putfield_or_static(byte_no, false); } +void TemplateTable::nofast_putfield(int byte_no) { + putfield_or_static(byte_no, false, may_not_rewrite); +} + void TemplateTable::putstatic(int byte_no) { putfield_or_static(byte_no, true); } - void TemplateTable::fast_xaccess(TosState state) { transition(vtos, state); Register Rcache = G3_scratch; @@ -2971,7 +2977,9 @@ __ br(Assembler::zero, false, Assembler::pt, notFinal); __ delayed()->and3(Rret, 0xFF, G4_scratch); // gets number of parameters - patch_bytecode(Bytecodes::_fast_invokevfinal, Rscratch, Rtemp); + if (RewriteBytecodes && !UseSharedSpaces) { + patch_bytecode(Bytecodes::_fast_invokevfinal, Rscratch, Rtemp); + } invokevfinal_helper(Rscratch, Rret);
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,7 +32,7 @@ int VM_Version::_features = VM_Version::unknown_m; const char* VM_Version::_features_str = ""; -unsigned int VM_Version::_L2_cache_line_size = 0; +unsigned int VM_Version::_L2_data_cache_line_size = 0; void VM_Version::initialize() { _features = determine_features(); @@ -356,10 +356,17 @@ (cache_line_size > ContendedPaddingWidth)) ContendedPaddingWidth = cache_line_size; + // This machine does not allow unaligned memory accesses + if (UseUnalignedAccesses) { + if (!FLAG_IS_DEFAULT(UseUnalignedAccesses)) + warning("Unaligned memory access is not available on this CPU"); + FLAG_SET_DEFAULT(UseUnalignedAccesses, false); + } + #ifndef PRODUCT if (PrintMiscellaneous && Verbose) { tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); - tty->print_cr("L2 cache line size: %u", L2_cache_line_size()); + tty->print_cr("L2 data cache line size: %u", L2_data_cache_line_size()); tty->print("Allocation"); if (AllocatePrefetchStyle <= 0) { tty->print_cr(": no prefetching");
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp Fri May 01 03:56:01 2015 -0700 @@ -96,8 +96,8 @@ static int _features; static const char* _features_str; - static unsigned int _L2_cache_line_size; - static unsigned int L2_cache_line_size() { return _L2_cache_line_size; } + static unsigned int _L2_data_cache_line_size; + static unsigned int L2_data_cache_line_size() { return _L2_data_cache_line_size; } static void print_features(); static int determine_features(); @@ -171,7 +171,7 @@ static const char* cpu_features() { return _features_str; } // default prefetch block size on sparc - static intx prefetch_data_size() { return L2_cache_line_size(); } + static intx prefetch_data_size() { return L2_data_cache_line_size(); } // Prefetch static intx prefetch_copy_interval_in_bytes() {
--- a/src/cpu/x86/vm/assembler_x86.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/assembler_x86.cpp Fri May 01 03:56:01 2015 -0700 @@ -3359,6 +3359,20 @@ // Integer vector arithmetic +void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); + emit_int8(0x01); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); + emit_int8(0x02); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::paddb(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); emit_simd_arith(0xFC, dst, src, VEX_SIMD_66); @@ -3379,6 +3393,20 @@ emit_simd_arith(0xD4, dst, src, VEX_SIMD_66); } +void Assembler::phaddw(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse3(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_int8(0x01); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::phaddd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse3(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_int8(0x02); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); @@ -3804,6 +3832,17 @@ emit_int8(0x01); } +void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + bool vector256 = true; + int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); + emit_int8(0x19); + emit_int8((unsigned char)(0xC0 | encode)); + // 0x00 - insert into lower 128 bits + // 0x01 - insert into upper 128 bits + emit_int8(0x01); +} + void Assembler::vextractf128h(Address dst, XMMRegister src) { assert(VM_Version::supports_avx(), ""); InstructionMark im(this);
--- a/src/cpu/x86/vm/assembler_x86.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/assembler_x86.hpp Fri May 01 03:56:01 2015 -0700 @@ -142,8 +142,10 @@ #endif // _LP64 -// JSR 292 fixed register usages: -REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp); +// JSR 292 +// On x86, the SP does not have to be saved when invoking method handle intrinsics +// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg. +REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg); // Address is an abstraction used to represent a memory location // using any of the amd64 addressing modes with one object. @@ -1777,6 +1779,12 @@ void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + // Add horizontal packed integers + void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void phaddw(XMMRegister dst, XMMRegister src); + void phaddd(XMMRegister dst, XMMRegister src); + // Add packed integers void paddb(XMMRegister dst, XMMRegister src); void paddw(XMMRegister dst, XMMRegister src); @@ -1869,6 +1877,7 @@ // Copy low 128bit into high 128bit of YMM registers. void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vextractf128h(XMMRegister dst, XMMRegister src); // Load/store high 128bit of YMM registers which does not destroy other half. void vinsertf128h(XMMRegister dst, Address src);
--- a/src/cpu/x86/vm/c1_FrameMap_x86.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/c1_FrameMap_x86.cpp Fri May 01 03:56:01 2015 -0700 @@ -343,14 +343,13 @@ return FrameMap::rsp_opr; } - // JSR 292 +// On x86, there is no need to save the SP, because neither +// method handle intrinsics, nor compiled lambda forms modify it. LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { - assert(rbp == rbp_mh_SP_save, "must be same register"); - return rbp_opr; + return LIR_OprFact::illegalOpr; } - bool FrameMap::validate_frame() { return true; }
--- a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp Fri May 01 03:56:01 2015 -0700 @@ -360,6 +360,9 @@ generate_stack_overflow_check(bang_size_in_bytes); push(rbp); + if (PreserveFramePointer) { + mov(rbp, rsp); + } #ifdef TIERED // c2 leaves fpu stack dirty. Clean it on entry if (UseSSE < 2 ) {
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp Fri May 01 03:56:01 2015 -0700 @@ -754,14 +754,9 @@ // WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP // since we do a leave anyway. - // Pop the return address since we are possibly changing SP (restoring from BP). + // Pop the return address. __ leave(); __ pop(rcx); - - // Restore SP from BP if the exception PC is a method handle call site. - NOT_LP64(__ get_thread(thread);) - __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0); - __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save); __ jmp(rcx); // jump to exception handler break; default: ShouldNotReachHere(); @@ -832,11 +827,6 @@ // the pop is also necessary to simulate the effect of a ret(0) __ pop(exception_pc); - // Restore SP from BP if the exception PC is a method handle call site. - NOT_LP64(__ get_thread(thread);) - __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0); - __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save); - // continue at exception handler (return address removed) // note: do *not* remove arguments when unwinding the // activation since the caller assumes having
--- a/src/cpu/x86/vm/frame_x86.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/frame_x86.cpp Fri May 01 03:56:01 2015 -0700 @@ -224,7 +224,8 @@ if (sender_blob->is_nmethod()) { nmethod* nm = sender_blob->as_nmethod_or_null(); if (nm != NULL) { - if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) { + if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || + nm->method()->is_method_handle_intrinsic()) { return false; } } @@ -391,10 +392,9 @@ // frame::verify_deopt_original_pc // // Verifies the calculated original PC of a deoptimization PC for the -// given unextended SP. The unextended SP might also be the saved SP -// for MethodHandle call sites. +// given unextended SP. #ifdef ASSERT -void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) { +void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp) { frame fr; // This is ugly but it's better than to change {get,set}_original_pc @@ -404,33 +404,23 @@ address original_pc = nm->get_original_pc(&fr); assert(nm->insts_contains(original_pc), "original PC must be in nmethod"); - assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be"); } #endif //------------------------------------------------------------------------------ // frame::adjust_unextended_sp void frame::adjust_unextended_sp() { - // If we are returning to a compiled MethodHandle call site, the - // saved_fp will in fact be a saved value of the unextended SP. The - // simplest way to tell whether we are returning to such a call site - // is as follows: + // On x86, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null(); if (sender_nm != NULL) { - // If the sender PC is a deoptimization point, get the original - // PC. For MethodHandle call site the unextended_sp is stored in - // saved_fp. - if (sender_nm->is_deopt_mh_entry(_pc)) { - DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp)); - _unextended_sp = _fp; - } - else if (sender_nm->is_deopt_entry(_pc)) { + // If the sender PC is a deoptimization point, get the original PC. + if (sender_nm->is_deopt_entry(_pc) || + sender_nm->is_deopt_mh_entry(_pc)) { DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp)); } - else if (sender_nm->is_method_handle_return(_pc)) { - _unextended_sp = _fp; - } } }
--- a/src/cpu/x86/vm/frame_x86.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/frame_x86.hpp Fri May 01 03:56:01 2015 -0700 @@ -76,11 +76,11 @@ // [locals and parameters ] // <- sender sp -// [1] When the c++ interpreter calls a new method it returns to the frame +// [1] When the C++ interpreter calls a new method it returns to the frame // manager which allocates a new frame on the stack. In that case there // is no real callee of this newly allocated frame. The frame manager is -// aware of the additional frame(s) and will pop them as nested calls -// complete. Howevers tTo make it look good in the debugger the frame +// aware of the additional frame(s) and will pop them as nested calls +// complete. However, to make it look good in the debugger the frame // manager actually installs a dummy pc pointing to RecursiveInterpreterActivation // with a fake interpreter_state* parameter to make it easy to debug // nested calls. @@ -88,7 +88,7 @@ // Note that contrary to the layout for the assembly interpreter the // expression stack allocated for the C++ interpreter is full sized. // However this is not as bad as it seems as the interpreter frame_manager -// will truncate the unused space on succesive method calls. +// will truncate the unused space on successive method calls. // // ------------------------------ C++ interpreter ---------------------------------------- @@ -167,10 +167,7 @@ #ifdef ASSERT // Used in frame::sender_for_{interpreter,compiled}_frame - static void verify_deopt_original_pc( nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false); - static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) { - verify_deopt_original_pc(nm, unextended_sp, true); - } + static void verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp); #endif public:
--- a/src/cpu/x86/vm/frame_x86.inline.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/frame_x86.inline.hpp Fri May 01 03:56:01 2015 -0700 @@ -94,7 +94,7 @@ // find_blob call. This is also why we can have no asserts on the validity // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler // -> pd_last_frame should use a specialized version of pd_last_frame which could - // call a specilaized frame constructor instead of this one. + // call a specialized frame constructor instead of this one. // Then we could use the assert below. However this assert is of somewhat dubious // value. // assert(_pc != NULL, "no pc?");
--- a/src/cpu/x86/vm/globalDefinitions_x86.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/globalDefinitions_x86.hpp Fri May 01 03:56:01 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -58,4 +58,9 @@ #endif #endif +#if defined(COMPILER2) && !defined(JAVASE_EMBEDDED) +// Include Restricted Transactional Memory lock eliding optimization +#define INCLUDE_RTM_OPT 1 +#endif + #endif // CPU_X86_VM_GLOBALDEFINITIONS_X86_HPP
--- a/src/cpu/x86/vm/globals_x86.hpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/globals_x86.hpp Fri May 01 03:56:01 2015 -0700 @@ -82,14 +82,13 @@ define_pd_global(uintx, TypeProfileLevel, 111); +define_pd_global(bool, PreserveFramePointer, false); + #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ \ develop(bool, IEEEPrecision, true, \ "Enables IEEE precision (for INTEL only)") \ \ - product(intx, FenceInstruction, 0, \ - "(Unsafe,Unstable) Experimental") \ - \ product(bool, UseStoreImmI16, true, \ "Use store immediate 16-bits value instruction on x86") \ \
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Fri May 01 03:56:01 2015 -0700 @@ -1958,6 +1958,11 @@ // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter // should not be unlocked by "normal" java-level locking and vice-versa. The specification // doesn't specify what will occur if a program engages in such mixed-mode locking, however. +// Arguably given that the spec legislates the JNI case as undefined our implementation +// could reasonably *avoid* checking owner in Fast_Unlock(). +// In the interest of performance we elide m->Owner==Self check in unlock. +// A perfectly viable alternative is to elide the owner check except when +// Xcheck:jni is enabled. void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) { assert(boxReg == rax, ""); @@ -1966,24 +1971,6 @@ if (EmitSync & 4) { // Disable - inhibit all inlining. Force control through the slow-path cmpptr (rsp, 0); - } else - if (EmitSync & 8) { - Label DONE_LABEL; - if (UseBiasedLocking) { - biased_locking_exit(objReg, tmpReg, DONE_LABEL); - } - // Classic stack-locking code ... - // Check whether the displaced header is 0 - //(=> recursive unlock) - movptr(tmpReg, Address(boxReg, 0)); - testptr(tmpReg, tmpReg); - jccb(Assembler::zero, DONE_LABEL); - // If not recursive lock, reset the header to displaced header - if (os::is_MP()) { - lock(); - } - cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box - bind(DONE_LABEL); } else { Label DONE_LABEL, Stacked, CheckSucc; @@ -2060,9 +2047,9 @@ // the number of loads below (currently 4) to just 2 or 3. // Refer to the comments in synchronizer.cpp. // In practice the chain of fetches doesn't seem to impact performance, however. + xorptr(boxReg, boxReg); if ((EmitSync & 65536) == 0 && (EmitSync & 256)) { // Attempt to reduce branch density - AMD's branch predictor. - xorptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); @@ -2070,7 +2057,6 @@ movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); jmpb (DONE_LABEL); } else { - xorptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); jccb (Assembler::notZero, DONE_LABEL); movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); @@ -2093,10 +2079,8 @@ bind (CheckSucc); // Optional pre-test ... it's safe to elide this - if ((EmitSync & 16) == 0) { - cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); - jccb (Assembler::zero, LGoSlowPath); - } + cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); + jccb(Assembler::zero, LGoSlowPath); // We have a classic Dekker-style idiom: // ST m->_owner = 0 ; MEMBAR; LD m->_succ @@ -2109,7 +2093,8 @@ // In older IA32 processors MFENCE is slower than lock:add or xchg // particularly if the write-buffer is full as might be the case if // if stores closely precede the fence or fence-equivalent instruction. - // In more modern implementations MFENCE appears faster, however. + // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences + // as the situation has changed with Nehalem and Shanghai. // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack // The $lines underlying the top-of-stack should be in M-state. // The locked add instruction is serializing, of course. @@ -2126,11 +2111,7 @@ movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); if (os::is_MP()) { - if (VM_Version::supports_sse2() && 1 == FenceInstruction) { - mfence(); - } else { - lock (); addptr(Address(rsp, 0), 0); - } + lock(); addptr(Address(rsp, 0), 0); } // Ratify _succ remains non-null cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0); @@ -2179,8 +2160,17 @@ } #else // _LP64 // It's inflated - movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); - xorptr(boxReg, r15_thread); + if (EmitSync & 1024) { + // Emit code to check that _owner == Self + // We could fold the _owner test into subsequent code more efficiently + // than using a stand-alone check, but since _owner checking is off by + // default we don't bother. We also might consider predicating the + // _owner==Self check on Xcheck:jni or running on a debug build. + movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + xorptr(boxReg, r15_thread); + } else { + xorptr(boxReg, boxReg); + } orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); jccb (Assembler::notZero, DONE_LABEL); movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); @@ -2190,23 +2180,51 @@ jmpb (DONE_LABEL); if ((EmitSync & 65536) == 0) { + // Try to avoid passing control into the slow_path ... Label LSuccess, LGoSlowPath ; bind (CheckSucc); + + // The following optional optimization can be elided if necessary + // Effectively: if (succ == null) goto SlowPath + // The code reduces the window for a race, however, + // and thus benefits performance. cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); jccb (Assembler::zero, LGoSlowPath); - // I'd much rather use lock:andl m->_owner, 0 as it's faster than the - // the explicit ST;MEMBAR combination, but masm doesn't currently support - // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc - // are all faster when the write buffer is populated. - movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); - if (os::is_MP()) { - lock (); addl (Address(rsp, 0), 0); + if ((EmitSync & 16) && os::is_MP()) { + orptr(boxReg, boxReg); + xchgptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + } else { + movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); + if (os::is_MP()) { + // Memory barrier/fence + // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ + // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. + // This is faster on Nehalem and AMD Shanghai/Barcelona. + // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences + // We might also restructure (ST Owner=0;barrier;LD _Succ) to + // (mov box,0; xchgq box, &m->Owner; LD _succ) . + lock(); addl(Address(rsp, 0), 0); + } } cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); jccb (Assembler::notZero, LSuccess); - movptr (boxReg, (int32_t)NULL_WORD); // box is really EAX + // Rare inopportune interleaving - race. + // The successor vanished in the small window above. + // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor. + // We need to ensure progress and succession. + // Try to reacquire the lock. + // If that fails then the new owner is responsible for succession and this + // thread needs to take no further action and can exit via the fast path (success). + // If the re-acquire succeeds then pass control into the slow path. + // As implemented, this latter mode is horrible because we generated more + // coherence traffic on the lock *and* artifically extended the critical section + // length while by virtue of passing control into the slow path. + + // box is really RAX -- the following CMPXCHG depends on that binding + // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R) + movptr(boxReg, (int32_t)NULL_WORD); if (os::is_MP()) { lock(); } cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); jccb (Assembler::notEqual, LSuccess); @@ -2231,10 +2249,6 @@ } #endif bind(DONE_LABEL); - // Avoid branch to branch on AMD processors - if (EmitSync & 32768) { - nop(); - } } } #endif // COMPILER2 @@ -6090,6 +6104,10 @@ // We always push rbp, so that on return to interpreter rbp, will be // restored correctly and we can correct the stack. push(rbp); + // Save caller's stack pointer into RBP if the frame pointer is preserved. + if (PreserveFramePointer) { + mov(rbp, rsp); + } // Remove word for ebp framesize -= wordSize; @@ -6104,6 +6122,11 @@ // Save RBP register now. framesize -= wordSize; movptr(Address(rsp, framesize), rbp); + // Save caller's stack pointer into RBP if the frame pointer is preserved. + if (PreserveFramePointer) { + movptr(rbp, rsp); + addptr(rbp, framesize + wordSize); + } } if (VerifyStackAtCalls) { // Majik cookie to verify stack depth @@ -6657,7 +6680,7 @@ subl(cnt2, stride2); jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); // clean upper bits of YMM registers - vzeroupper(); + vpxor(vec1, vec1); // compare wide vectors tail bind(COMPARE_WIDE_TAIL); @@ -6672,7 +6695,7 @@ // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. bind(VECTOR_NOT_EQUAL); // clean upper bits of YMM registers - vzeroupper(); + vpxor(vec1, vec1); lea(str1, Address(str1, result, scale)); lea(str2, Address(str2, result, scale)); jmp(COMPARE_16_CHARS); @@ -6931,7 +6954,8 @@ bind(DONE); if (UseAVX >= 2) { // clean upper bits of YMM registers - vzeroupper(); + vpxor(vec1, vec1); + vpxor(vec2, vec2); } } @@ -7065,7 +7089,8 @@ BIND(L_check_fill_8_bytes); // clean upper bits of YMM registers - vzeroupper(); + movdl(xtmp, value); + pshufd(xtmp, xtmp, 0); } else { // Fill 32-byte chunks pshufd(xtmp, xtmp, 0); @@ -7228,7 +7253,11 @@ bind(L_copy_16_chars_exit); if (UseAVX >= 2) { // clean upper bits of YMM registers - vzeroupper(); + vpxor(tmp2Reg, tmp2Reg); + vpxor(tmp3Reg, tmp3Reg); + vpxor(tmp4Reg, tmp4Reg); + movdl(tmp1Reg, tmp5); + pshufd(tmp1Reg, tmp1Reg, 0); } subptr(len, 8); jccb(Assembler::greater, L_copy_8_chars_exit);
--- a/src/cpu/x86/vm/methodHandles_x86.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/methodHandles_x86.cpp Fri May 01 03:56:01 2015 -0700 @@ -374,7 +374,7 @@ // member_reg - MemberName that was the trailing argument // temp1_recv_klass - klass of stacked receiver, if needed // rsi/r13 - interpreter linkage (if interpreted) - // rcx, rdx, rsi, rdi, r8, r8 - compiler arguments (if compiled) + // rcx, rdx, rsi, rdi, r8 - compiler arguments (if compiled) Label L_incompatible_class_change_error; switch (iid) {
--- a/src/cpu/x86/vm/rtmLocking.cpp Thu Apr 30 17:20:25 2015 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "memory/allocation.inline.hpp" -#include "runtime/task.hpp" -#include "runtime/rtmLocking.hpp" - -// One-shot PeriodicTask subclass for enabling RTM locking -uintx RTMLockingCounters::_calculation_flag = 0; - -class RTMLockingCalculationTask : public PeriodicTask { - public: - RTMLockingCalculationTask(size_t interval_time) : PeriodicTask(interval_time){ } - - virtual void task() { - RTMLockingCounters::_calculation_flag = 1; - // Reclaim our storage and disenroll ourself - delete this; - } -}; - -void RTMLockingCounters::init() { - if (UseRTMLocking && RTMLockingCalculationDelay > 0) { - RTMLockingCalculationTask* task = new RTMLockingCalculationTask(RTMLockingCalculationDelay); - task->enroll(); - } else { - _calculation_flag = 1; - } -} - -//------------------------------print_on------------------------------- -void RTMLockingCounters::print_on(outputStream* st) { - tty->print_cr("# rtm locks total (estimated): " UINTX_FORMAT, _total_count * RTMTotalCountIncrRate); - tty->print_cr("# rtm lock aborts : " UINTX_FORMAT, _abort_count); - for (int i = 0; i < ABORT_STATUS_LIMIT; i++) { - tty->print_cr("# rtm lock aborts %d: " UINTX_FORMAT, i, _abortX_count[i]); - } -}
--- a/src/cpu/x86/vm/runtime_x86_32.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/runtime_x86_32.cpp Fri May 01 03:56:01 2015 -0700 @@ -126,10 +126,6 @@ // rax: exception handler for given <exception oop/exception pc> - // Restore SP from BP if the exception PC is a MethodHandle call site. - __ cmpl(Address(rcx, JavaThread::is_method_handle_return_offset()), 0); - __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save); - // We have a handler in rax, (could be deopt blob) // rdx - throwing pc, deopt blob will need it.
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2343,12 +2343,14 @@ // should be a peal // +wordSize because of the push above + // args are (oop obj, BasicLock* lock, JavaThread* thread) + __ push(thread); __ lea(rax, Address(rbp, lock_slot_rbp_offset)); __ push(rax); __ push(obj_reg); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); - __ addptr(rsp, 2*wordSize); + __ addptr(rsp, 3*wordSize); #ifdef ASSERT { Label L;
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Fri May 01 03:56:01 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2581,6 +2581,7 @@ __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); __ mov(c_rarg0, obj_reg); + __ mov(c_rarg2, r15_thread); __ mov(r12, rsp); // remember sp __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows __ andptr(rsp, -16); // align stack as required by ABI @@ -2590,6 +2591,7 @@ __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset()))); __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); + // args are (oop obj, BasicLock* lock, JavaThread* thread) __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); __ mov(rsp, r12); // restore sp __ reinit_heapbase(); @@ -3393,8 +3395,8 @@ // Save callee-saved registers. See x86_64.ad. - // rbp is an implicitly saved callee saved register (i.e. the calling - // convention will save restore it in prolog/epilog) Other than that + // rbp is an implicitly saved callee saved register (i.e., the calling + // convention will save/restore it in the prolog/epilog). Other than that // there are no callee save registers now that adapter frames are gone. __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); @@ -3436,9 +3438,9 @@ // Restore callee-saved registers - // rbp is an implicitly saved callee saved register (i.e. the calling + // rbp is an implicitly saved callee-saved register (i.e., the calling // convention will save restore it in prolog/epilog) Other than that - // there are no callee save registers no that adapter frames are gone. + // there are no callee save registers now that adapter frames are gone. __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt)); @@ -3447,10 +3449,6 @@ // rax: exception handler - // Restore SP from BP if the exception PC is a MethodHandle call site. - __ cmpl(Address(r15_thread, JavaThread::is_method_handle_return_offset()), 0); - __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save); - // We have a handler in rax (could be deopt blob). __ mov(r8, rax);
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Fri May 01 03:56:01 2015 -0700 @@ -835,7 +835,8 @@ if (UseUnalignedLoadStores && (UseAVX >= 2)) { // clean upper bits of YMM registers - __ vzeroupper(); + __ vpxor(xmm0, xmm0); + __ vpxor(xmm1, xmm1); } __ addl(qword_count, 8); __ jccb(Assembler::zero, L_exit);
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri May 01 03:56:01 2015 -0700 @@ -1352,7 +1352,8 @@ __ BIND(L_end); if (UseAVX >= 2) { // clean upper bits of YMM registers - __ vzeroupper(); + __ vpxor(xmm0, xmm0); + __ vpxor(xmm1, xmm1); } } else { // Copy 32-bytes per iteration @@ -1429,7 +1430,8 @@ __ BIND(L_end); if (UseAVX >= 2) { // clean upper bits of YMM registers - __ vzeroupper(); + __ vpxor(xmm0, xmm0); + __ vpxor(xmm1, xmm1); } } else { // Copy 32-bytes per iteration
--- a/src/cpu/x86/vm/templateTable_x86.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/templateTable_x86.cpp Fri May 01 03:56:01 2015 -0700 @@ -543,8 +543,16 @@ } void TemplateTable::iload() { + iload_internal(); +} + +void TemplateTable::nofast_iload() { + iload_internal(may_not_rewrite); +} + +void TemplateTable::iload_internal(RewriteControl rc) { transition(vtos, itos); - if (RewriteFrequentPairs) { + if (RewriteFrequentPairs && rc == may_rewrite) { Label rewrite, done; const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx); LP64_ONLY(assert(rbx != bc, "register damaged")); @@ -815,6 +823,14 @@ } void TemplateTable::aload_0() { + aload_0_internal(); +} + +void TemplateTable::nofast_aload_0() { + aload_0_internal(may_not_rewrite); +} + +void TemplateTable::aload_0_internal(RewriteControl rc) { transition(vtos, atos); // According to bytecode histograms, the pairs: // @@ -837,7 +853,7 @@ // aload_0, iload_1 // These bytecodes with a small amount of code are most profitable // to rewrite - if (RewriteFrequentPairs) { + if (RewriteFrequentPairs && rc == may_rewrite) { Label rewrite, done; const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx); @@ -2491,29 +2507,21 @@ assert_different_registers(Rcache, index, temp); Label resolved; - assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); - __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); - __ cmpl(temp, (int) bytecode()); // have we resolved this bytecode? - __ jcc(Assembler::equal, resolved); + + Bytecodes::Code code = bytecode(); + switch (code) { + case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; + case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; + } + + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); + __ cmpl(temp, code); // have we resolved this bytecode? + __ jcc(Assembler::equal, resolved); // resolve first time through - address entry; - switch (bytecode()) { - case Bytecodes::_getstatic : // fall through - case Bytecodes::_putstatic : // fall through - case Bytecodes::_getfield : // fall through - case Bytecodes::_putfield : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); break; - case Bytecodes::_invokevirtual : // fall through - case Bytecodes::_invokespecial : // fall through - case Bytecodes::_invokestatic : // fall through - case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); break; - case Bytecodes::_invokehandle : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle); break; - case Bytecodes::_invokedynamic : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); break; - default: - fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode()))); - break; - } - __ movl(temp, (int)bytecode()); + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); + __ movl(temp, code); __ call_VM(noreg, entry, temp); // Update registers with resolved info __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); @@ -2628,7 +2636,7 @@ __ verify_oop(r); } -void TemplateTable::getfield_or_static(int byte_no, bool is_static) { +void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { transition(vtos, vtos); const Register cache = rcx; @@ -2660,7 +2668,7 @@ __ load_signed_byte(rax, field); __ push(btos); // Rewrite bytecode to be faster - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_bgetfield, bc, rbx); } __ jmp(Done); @@ -2671,7 +2679,7 @@ // atos __ load_heap_oop(rax, field); __ push(atos); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_agetfield, bc, rbx); } __ jmp(Done); @@ -2683,7 +2691,7 @@ __ movl(rax, field); __ push(itos); // Rewrite bytecode to be faster - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_igetfield, bc, rbx); } __ jmp(Done); @@ -2695,7 +2703,7 @@ __ load_unsigned_short(rax, field); __ push(ctos); // Rewrite bytecode to be faster - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_cgetfield, bc, rbx); } __ jmp(Done); @@ -2707,7 +2715,7 @@ __ load_signed_short(rax, field); __ push(stos); // Rewrite bytecode to be faster - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_sgetfield, bc, rbx); } __ jmp(Done); @@ -2731,7 +2739,7 @@ __ push(ltos); // Rewrite bytecode to be faster - LP64_ONLY(if (!is_static) patch_bytecode(Bytecodes::_fast_lgetfield, bc, rbx)); + LP64_ONLY(if (!is_static && rc == may_rewrite) patch_bytecode(Bytecodes::_fast_lgetfield, bc, rbx)); __ jmp(Done); __ bind(notLong); @@ -2743,7 +2751,7 @@ NOT_LP64(__ fld_s(field)); __ push(ftos); // Rewrite bytecode to be faster - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_fgetfield, bc, rbx); } __ jmp(Done); @@ -2758,7 +2766,7 @@ NOT_LP64(__ fld_d(field)); __ push(dtos); // Rewrite bytecode to be faster - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_dgetfield, bc, rbx); } #ifdef ASSERT @@ -2779,6 +2787,10 @@ getfield_or_static(byte_no, false); } +void TemplateTable::nofast_getfield(int byte_no) { + getfield_or_static(byte_no, false, may_not_rewrite); +} + void TemplateTable::getstatic(int byte_no) { getfield_or_static(byte_no, true); } @@ -2870,7 +2882,7 @@ } } -void TemplateTable::putfield_or_static(int byte_no, bool is_static) { +void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { transition(vtos, vtos); const Register cache = rcx; @@ -2911,7 +2923,7 @@ __ pop(btos); if (!is_static) pop_and_check_object(obj); __ movb(field, rax); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_bputfield, bc, rbx, true, byte_no); } __ jmp(Done); @@ -2927,7 +2939,7 @@ if (!is_static) pop_and_check_object(obj); // Store into the field do_oop_store(_masm, field, rax, _bs->kind(), false); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx, true, byte_no); } __ jmp(Done); @@ -2942,7 +2954,7 @@ __ pop(itos); if (!is_static) pop_and_check_object(obj); __ movl(field, rax); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_iputfield, bc, rbx, true, byte_no); } __ jmp(Done); @@ -2957,7 +2969,7 @@ __ pop(ctos); if (!is_static) pop_and_check_object(obj); __ movw(field, rax); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_cputfield, bc, rbx, true, byte_no); } __ jmp(Done); @@ -2972,7 +2984,7 @@ __ pop(stos); if (!is_static) pop_and_check_object(obj); __ movw(field, rax); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_sputfield, bc, rbx, true, byte_no); } __ jmp(Done); @@ -2988,7 +3000,7 @@ __ pop(ltos); if (!is_static) pop_and_check_object(obj); __ movq(field, rax); - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_lputfield, bc, rbx, true, byte_no); } __ jmp(Done); @@ -3035,7 +3047,7 @@ if (!is_static) pop_and_check_object(obj); NOT_LP64( __ fstp_s(field);) LP64_ONLY( __ movflt(field, xmm0);) - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx, true, byte_no); } __ jmp(Done); @@ -3053,7 +3065,7 @@ if (!is_static) pop_and_check_object(obj); NOT_LP64( __ fstp_d(field);) LP64_ONLY( __ movdbl(field, xmm0);) - if (!is_static) { + if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx, true, byte_no); } } @@ -3079,6 +3091,10 @@ putfield_or_static(byte_no, false); } +void TemplateTable::nofast_putfield(int byte_no) { + putfield_or_static(byte_no, false, may_not_rewrite); +} + void TemplateTable::putstatic(int byte_no) { putfield_or_static(byte_no, true); }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/x86/vm/templateTable_x86.hpp Fri May 01 03:56:01 2015 -0700 @@ -0,0 +1,42 @@ +/* + * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_X86_VM_TEMPLATETABLE_X86_HPP +#define CPU_X86_VM_TEMPLATETABLE_X86_HPP + + static void prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index = noreg, // itable index, MethodType, etc. + Register recv = noreg, // if caller wants to see it + Register flags = noreg // if caller wants to test it + ); + static void invokevirtual_helper(Register index, Register recv, + Register flags); + static void volatile_barrier(Assembler::Membar_mask_bits order_constraint); + + // Helpers + static void index_check(Register array, Register index); + static void index_check_without_pop(Register array, Register index); + +#endif // CPU_X86_VM_TEMPLATETABLE_X86_HPP
--- a/src/cpu/x86/vm/templateTable_x86_32.hpp Thu Apr 30 17:20:25 2015 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -/* - * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef CPU_X86_VM_TEMPLATETABLE_X86_32_HPP -#define CPU_X86_VM_TEMPLATETABLE_X86_32_HPP - - static void prepare_invoke(int byte_no, - Register method, // linked method (or i-klass) - Register index = noreg, // itable index, MethodType, etc. - Register recv = noreg, // if caller wants to see it - Register flags = noreg // if caller wants to test it - ); - static void invokevirtual_helper(Register index, Register recv, - Register flags); - static void volatile_barrier(Assembler::Membar_mask_bits order_constraint); - - // Helpers - static void index_check(Register array, Register index); - static void index_check_without_pop(Register array, Register index); - -#endif // CPU_X86_VM_TEMPLATETABLE_X86_32_HPP
--- a/src/cpu/x86/vm/templateTable_x86_64.hpp Thu Apr 30 17:20:25 2015 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef CPU_X86_VM_TEMPLATETABLE_X86_64_HPP -#define CPU_X86_VM_TEMPLATETABLE_X86_64_HPP - - static void prepare_invoke(int byte_no, - Register method, // linked method (or i-klass) - Register index = noreg, // itable index, MethodType, etc. - Register recv = noreg, // if caller wants to see it - Register flags = noreg // if caller wants to test it - ); - static void invokevirtual_helper(Register index, Register recv, - Register flags); - static void volatile_barrier(Assembler::Membar_mask_bits order_constraint); - - // Helpers - static void index_check(Register array, Register index); - static void index_check_without_pop(Register array, Register index); - -#endif // CPU_X86_VM_TEMPLATETABLE_X86_64_HPP
--- a/src/cpu/x86/vm/vm_version_x86.cpp Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Fri May 01 03:56:01 2015 -0700 @@ -379,15 +379,6 @@ }; }; - -void VM_Version::get_cpu_info_wrapper() { - get_cpu_info_stub(&_cpuid_info); -} - -#ifndef CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED - #define CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED(f) f() -#endif - void VM_Version::get_processor_features() { _cpu = 4; // 486 by default @@ -401,9 +392,7 @@ if (!Use486InstrsOnly) { // Get raw processor info - // Some platforms (like Win*) need a wrapper around here - // in order to properly handle SEGV for YMM registers test. - CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED(get_cpu_info_wrapper); + get_cpu_info_stub(&_cpuid_info); assert_is_initialized(); _cpu = extended_cpu_family(); @@ -980,6 +969,11 @@ (cache_line_size > ContendedPaddingWidth)) ContendedPaddingWidth = cache_line_size; + // This machine allows unaligned memory accesses + if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { + FLAG_SET_DEFAULT(UseUnalignedAccesses, true); + } + #ifndef PRODUCT if (PrintMiscellaneous && Verbose) { tty->print_cr("Logical CPUs per core: %u",
--- a/src/cpu/x86/vm/x86.ad Thu Apr 30 17:20:25 2015 -0700 +++ b/src/cpu/x86/vm/x86.ad Fri May 01 03:56:01 2015 -0700 @@ -490,7 +490,7 @@ class NativeJump; class CallStubImpl { - + //-------------------------------------------------------------- //---< Used for optimization in Compile::shorten_branches >--- //-------------------------------------------------------------- @@ -500,9 +500,9 @@ static uint size_call_trampoline() { return 0; // no call trampolines on this platform } - + // number of relocations needed by a call trampoline stub - static uint reloc_call_trampoline() { + static uint reloc_call_trampoline() { return 0; // no call trampolines on this platform } }; @@ -623,6 +623,22 @@ if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX return false; break; + case Op_AddReductionVL: + if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here + return false; + case Op_AddReductionVI: + if (UseSSE < 3) // requires at least SSE3 + return false; + case Op_MulReductionVI: + if (UseSSE < 4) // requires at least SSE4 + return false; + case Op_AddReductionVF: + case Op_AddReductionVD: + case Op_MulReductionVF: + case Op_MulReductionVD: + if (UseSSE < 1) // requires at least SSE + return false; + break; case Op_CompareAndSwapL: #ifdef _LP64 case Op_CompareAndSwapP: @@ -914,21 +930,6 @@ encode %{ - enc_class preserve_SP %{ - debug_only(int off0 = cbuf.insts_size()); - MacroAssembler _masm(&cbuf); - // RBP is preserved across all calls, even compiled calls. - // Use it to preserve RSP in places where the callee might change the SP. - __ movptr(rbp_mh_SP_save, rsp); - debug_only(int off1 = cbuf.insts_size()); - assert(off1 - off0 == preserve_SP_size(), "correct size prediction"); - %} - - enc_class restore_SP %{ - MacroAssembler _masm(&cbuf); - __ movptr(rsp, rbp_mh_SP_save); - %} - enc_class call_epilog %{ if (VerifyStackAtCalls) { // Check that stack depth is unchanged: find majik cookie on stack @@ -2532,6 +2533,574 @@ ins_pipe( fpu_reg_reg ); %} +// ====================REDUCTION ARITHMETIC======================================= + +instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ + predicate(UseSSE > 2 && UseAVX == 0); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP tmp2, TEMP tmp); + format %{ "movdqu $tmp2,$src2\n\t" + "phaddd $tmp2,$tmp2\n\t" + "movd $tmp,$src1\n\t" + "paddd $tmp,$tmp2\n\t" + "movd $dst,$tmp\t! add reduction2I" %} + ins_encode %{ + __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); + __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); + __ movdl($tmp$$XMMRegister, $src1$$Register); + __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ movdl($dst$$Register, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "vphaddd $tmp,$src2,$src2\n\t" + "movd $tmp2,$src1\n\t" + "vpaddd $tmp2,$tmp2,$tmp\n\t" + "movd $dst,$tmp2\t! add reduction2I" %} + ins_encode %{ + __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false); + __ movdl($tmp2$$XMMRegister, $src1$$Register); + __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); + __ movdl($dst$$Register, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ + predicate(UseSSE > 2 && UseAVX == 0); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP tmp2, TEMP tmp); + format %{ "movdqu $tmp2,$src2\n\t" + "phaddd $tmp2,$tmp2\n\t" + "phaddd $tmp2,$tmp2\n\t" + "movd $tmp,$src1\n\t" + "paddd $tmp,$tmp2\n\t" + "movd $dst,$tmp\t! add reduction4I" %} + ins_encode %{ + __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); + __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); + __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); + __ movdl($tmp$$XMMRegister, $src1$$Register); + __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ movdl($dst$$Register, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "vphaddd $tmp,$src2,$src2\n\t" + "vphaddd $tmp,$tmp,$tmp2\n\t" + "movd $tmp2,$src1\n\t" + "vpaddd $tmp2,$tmp2,$tmp\n\t" + "movd $dst,$tmp2\t! add reduction4I" %} + ins_encode %{ + __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false); + __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); + __ movdl($tmp2$$XMMRegister, $src1$$Register); + __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); + __ movdl($dst$$Register, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "vphaddd $tmp,$src2,$src2\n\t" + "vphaddd $tmp,$tmp,$tmp2\n\t" + "vextractf128 $tmp2,$tmp\n\t" + "vpaddd $tmp,$tmp,$tmp2\n\t" + "movd $tmp2,$src1\n\t" + "vpaddd $tmp2,$tmp2,$tmp\n\t" + "movd $dst,$tmp2\t! add reduction8I" %} + ins_encode %{ + __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, true); + __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, true); + __ vextractf128h($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); + __ movdl($tmp2$$XMMRegister, $src1$$Register); + __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); + __ movdl($dst$$Register, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ + predicate(UseSSE >= 1 && UseAVX == 0); + match(Set dst (AddReductionVF src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "movdqu $tmp,$src1\n\t" + "addss $tmp,$src2\n\t" + "pshufd $tmp2,$src2,0x01\n\t" + "addss $tmp,$tmp2\n\t" + "movdqu $dst,$tmp\t! add reduction2F" %} + ins_encode %{ + __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); + __ addss($tmp$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); + __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (AddReductionVF src1 src2)); + effect(TEMP tmp2, TEMP tmp); + format %{ "vaddss $tmp2,$src1,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} + ins_encode %{ + __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);