OpenJDK / jigsaw / jake / hotspot
changeset 11000:7c31312c5725
Merge
author | aph |
---|---|
date | Thu, 17 Mar 2016 17:03:20 +0000 |
parents | b7ba700456c5 27654175e92a |
children | d28f025a1f80 |
files | src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/development/Server16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/development/Server24.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/About16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/About24.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Delete16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Delete24.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Find16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Help16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Help24.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/History16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/History24.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Information16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Information24.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/New16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/New24.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Open16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Open24.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Save24.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/SaveAs16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/SaveAs24.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Zoom16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/ZoomIn16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/ZoomIn24.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/navigation/Down16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/navigation/Up16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/text/AlignCenter16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/text/AlignCenter24.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/text/AlignLeft16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/text/AlignLeft24.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/text/AlignRight16.gif src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/text/AlignRight24.gif test/compiler/compilercontrol/jcmd/StressAddSequentiallyTest.java |
diffstat | 423 files changed, 8307 insertions(+), 4838 deletions(-) [+] |
line wrap: on
line diff
--- a/.hgtags Thu Feb 25 14:59:44 2016 +0000 +++ b/.hgtags Thu Mar 17 17:03:20 2016 +0000 @@ -509,3 +509,6 @@ 534c50395957c6025fb6627e93b35756f8d48a08 jdk-9+104 266fa9bb5297bf02cb2a7b038b10a109817d2b48 jdk-9+105 7232de4c17c37f60aecec4f3191090bd3d41d334 jdk-9+106 +c5146d4da417f76edfc43097d2e2ced042a65b4e jdk-9+107 +934f6793f5f7dca44f69b4559d525fa64b31840d jdk-9+108 +7e7e50ac4faf19899fc811569e32cfa478759ebb jdk-9+109
--- a/make/aix/makefiles/trace.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/aix/makefiles/trace.make Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ # -# Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -57,11 +57,6 @@ TraceGeneratedNames += \ traceRequestables.hpp \ traceEventControl.hpp - -ifneq ($(INCLUDE_TRACE), false) -TraceGeneratedNames += traceProducer.cpp -endif - endif TraceGeneratedFiles = $(TraceGeneratedNames:%=$(TraceOutDir)/%) @@ -100,9 +95,6 @@ $(TraceOutDir)/traceEventClasses.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceEventClasses.xsl $(XML_DEPS) $(GENERATE_CODE) -$(TraceOutDir)/traceProducer.cpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceProducer.xsl $(XML_DEPS) - $(GENERATE_CODE) - $(TraceOutDir)/traceRequestables.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceRequestables.xsl $(XML_DEPS) $(GENERATE_CODE)
--- a/make/bsd/makefiles/amd64.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/bsd/makefiles/amd64.make Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ # -# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -37,3 +37,11 @@ endif OPT_CFLAGS/compactingPermGenGen.o = -O1 + +# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides +# of OPT_CFLAGS. Restore it here. +ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) + OPT_CFLAGS/sharedRuntimeTrig.o += -g + OPT_CFLAGS/sharedRuntimeTrans.o += -g + OPT_CFLAGS/compactingPermGenGen.o += -g +endif
--- a/make/bsd/makefiles/arm.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/bsd/makefiles/arm.make Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ # -# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -24,8 +24,4 @@ Obj_Files += bsd_arm.o -ifneq ($(EXT_LIBS_PATH),) - LIBS += $(EXT_LIBS_PATH)/sflt_glibc.a -endif - CFLAGS += -DVM_LITTLE_ENDIAN
--- a/make/bsd/makefiles/gcc.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/bsd/makefiles/gcc.make Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ # -# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -330,6 +330,13 @@ ), 1) OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT) OPT_CFLAGS/unsafe.o += -O1 + + # The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides + # of OPT_CFLAGS. Restore it here. + ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) + OPT_CFLAGS/loopTransform.o += -g + OPT_CFLAGS/unsafe.o += -g + endif else $(error "Update compiler workarounds for Clang $(CC_VER_MAJOR).$(CC_VER_MINOR)") endif
--- a/make/bsd/makefiles/trace.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/bsd/makefiles/trace.make Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ # -# Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -57,11 +57,6 @@ TraceGeneratedNames += \ traceRequestables.hpp \ traceEventControl.hpp - -ifneq ($(INCLUDE_TRACE), false) -TraceGeneratedNames += traceProducer.cpp -endif - endif @@ -101,9 +96,6 @@ $(TraceOutDir)/traceEventClasses.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceEventClasses.xsl $(XML_DEPS) $(GENERATE_CODE) -$(TraceOutDir)/traceProducer.cpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceProducer.xsl $(XML_DEPS) - $(GENERATE_CODE) - $(TraceOutDir)/traceRequestables.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceRequestables.xsl $(XML_DEPS) $(GENERATE_CODE)
--- a/make/linux/makefiles/amd64.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/linux/makefiles/amd64.make Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ # -# Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -32,3 +32,11 @@ CFLAGS += -D_LP64=1 OPT_CFLAGS/compactingPermGenGen.o = -O1 + +# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides +# of OPT_CFLAGS. Restore it here. +ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) + OPT_CFLAGS/sharedRuntimeTrig.o += -g + OPT_CFLAGS/sharedRuntimeTrans.o += -g + OPT_CFLAGS/compactingPermGenGen.o += -g +endif
--- a/make/linux/makefiles/gcc.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/linux/makefiles/gcc.make Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ # -# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -265,6 +265,11 @@ # GCC >= 4.3 # Gcc 4.1.2 does not support this flag, nor does it have problems compiling the file. OPT_CFLAGS/vmStructs.o += -fno-var-tracking-assignments + # The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides + # of OPT_CFLAGS. Restore it here. + ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) + OPT_CFLAGS/vmStructs.o += -g + endif endif # The gcc compiler segv's on ia64 when compiling bytecodeInterpreter.cpp @@ -277,6 +282,11 @@ ifeq ($(USE_CLANG), true) ifeq ($(shell expr $(CC_VER_MAJOR) = 4 \& $(CC_VER_MINOR) = 2), 1) OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT) + # The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides + # of OPT_CFLAGS. Restore it here. + ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) + OPT_CFLAGS/loopTransform.o += -g + endif endif else # Do not allow GCC 4.1.1
--- a/make/linux/makefiles/i486.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/linux/makefiles/i486.make Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ # -# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -32,3 +32,11 @@ CFLAGS += -DVM_LITTLE_ENDIAN OPT_CFLAGS/compactingPermGenGen.o = -O1 + +# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides +# of OPT_CFLAGS. Restore it here. +ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) + OPT_CFLAGS/sharedRuntimeTrig.o += -g + OPT_CFLAGS/sharedRuntimeTrans.o += -g + OPT_CFLAGS/compactingPermGenGen.o += -g +endif
--- a/make/linux/makefiles/trace.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/linux/makefiles/trace.make Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ # -# Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -57,11 +57,6 @@ TraceGeneratedNames += \ traceRequestables.hpp \ traceEventControl.hpp - -ifneq ($(INCLUDE_TRACE), false) -TraceGeneratedNames += traceProducer.cpp -endif - endif TraceGeneratedFiles = $(TraceGeneratedNames:%=$(TraceOutDir)/%) @@ -100,9 +95,6 @@ $(TraceOutDir)/traceEventClasses.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceEventClasses.xsl $(XML_DEPS) $(GENERATE_CODE) -$(TraceOutDir)/traceProducer.cpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceProducer.xsl $(XML_DEPS) - $(GENERATE_CODE) - $(TraceOutDir)/traceRequestables.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceRequestables.xsl $(XML_DEPS) $(GENERATE_CODE)
--- a/make/linux/makefiles/zeroshark.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/linux/makefiles/zeroshark.make Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ # -# Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. # Copyright 2007, 2008 Red Hat, Inc. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # @@ -29,12 +29,6 @@ ifeq ($(USE_CLANG), true) WARNING_FLAGS += -Wno-undef endif -# Suppress some warning flags that are normally turned on for hotspot, -# because some of the zero code has not been updated accordingly. -WARNING_FLAGS += -Wno-return-type \ - -Wno-format-nonliteral -Wno-format-security \ - -Wno-maybe-uninitialized - # The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
--- a/make/solaris/makefiles/amd64.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/solaris/makefiles/amd64.make Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ # -# Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2004, 2016, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -34,6 +34,14 @@ OPT_CFLAGS/generateOptoStub.o = -xO2 # Temporary util SS12u1 C++ compiler is fixed OPT_CFLAGS/c1_LinearScan.o = -xO2 + +# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides +# of OPT_CFLAGS. Restore it here. +ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) + OPT_CFLAGS/generateOptoStub.o += -g0 -xs + OPT_CFLAGS/c1_LinearScan.o += -g0 -xs +endif + else ifeq ("${Platform_compiler}", "gcc")
--- a/make/solaris/makefiles/product.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/solaris/makefiles/product.make Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ # -# Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -35,11 +35,21 @@ # for this method for now. (fix this when dtrace bug 6258412 is fixed) ifndef USE_GCC OPT_CFLAGS/ciEnv.o = $(OPT_CFLAGS) -xinline=no%__1cFciEnvbFpost_compiled_method_load_event6MpnHnmethod__v_ +# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides +# of OPT_CFLAGS. Restore it here. +ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) + OPT_CFLAGS/ciEnv.o += -g0 -xs +endif endif # Need extra inlining to get oop_ps_push_contents functions to perform well enough. ifndef USE_GCC OPT_CFLAGS/psPromotionManager.o = $(OPT_CFLAGS) -W2,-Ainline:inc=1000 +# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides +# of OPT_CFLAGS. Restore it here. +ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) + OPT_CFLAGS/psPromotionManager.o += -g0 -xs +endif endif # (OPT_CFLAGS/SLOWER is also available, to alter compilation of buggy files) @@ -55,6 +65,12 @@ ifeq ($(shell expr $(COMPILER_REV_NUMERIC) \>= 509), 1) # dtrace cannot handle tail call optimization (6672627, 6693876) OPT_CFLAGS/jni.o = $(OPT_CFLAGS/DEFAULT) $(OPT_CCFLAGS/NO_TAIL_CALL_OPT) +# The -g0 -xs flag is added to OPT_CFLAGS in sparcWorks.make, but lost in case of +# per-file overrides of OPT_CFLAGS. Restore it here. This is mainly needed +# to provide a good baseline to compare the new build against. +ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) + OPT_CFLAGS/jni.o += -g0 -xs +endif endif # COMPILER_NUMERIC_REV >= 509 # Workaround SS11 bug 6345274 (all platforms) (Fixed in SS11 patch and SS12)
--- a/make/solaris/makefiles/sparcWorks.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/solaris/makefiles/sparcWorks.make Thu Mar 17 17:03:20 2016 +0000 @@ -158,9 +158,20 @@ OPT_CCFLAGS/NO_TAIL_CALL_OPT = -Qoption ube -O~yz OPT_CFLAGS/stubGenerator_x86_32.o = $(OPT_CFLAGS) -xspace OPT_CFLAGS/stubGenerator_x86_64.o = $(OPT_CFLAGS) -xspace +# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides +# of OPT_CFLAGS. Restore it here. +ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) + OPT_CFLAGS/stubGenerator_x86_32.o += -g0 -xs + OPT_CFLAGS/stubGenerator_x86_64.o += -g0 -xs +endif endif # Platform_arch == x86 ifeq ("${Platform_arch}", "sparc") OPT_CFLAGS/stubGenerator_sparc.o = $(OPT_CFLAGS) -xspace +# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides +# of OPT_CFLAGS. Restore it here. +ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) + OPT_CFLAGS/stubGenerator_sparc.o += -g0 -xs +endif endif endif # COMPILER_REV_NUMERIC >= 509
--- a/make/solaris/makefiles/trace.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/solaris/makefiles/trace.make Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ # -# Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -56,8 +56,7 @@ ifeq ($(HAS_ALT_SRC), true) TraceGeneratedNames += \ traceRequestables.hpp \ - traceEventControl.hpp \ - traceProducer.cpp + traceEventControl.hpp endif TraceGeneratedFiles = $(TraceGeneratedNames:%=$(TraceOutDir)/%) @@ -96,9 +95,6 @@ $(TraceOutDir)/traceEventClasses.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceEventClasses.xsl $(XML_DEPS) $(GENERATE_CODE) -$(TraceOutDir)/traceProducer.cpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceProducer.xsl $(XML_DEPS) - $(GENERATE_CODE) - $(TraceOutDir)/traceRequestables.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceRequestables.xsl $(XML_DEPS) $(GENERATE_CODE)
--- a/make/windows/makefiles/trace.make Thu Feb 25 14:59:44 2016 +0000 +++ b/make/windows/makefiles/trace.make Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ # -# Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -43,8 +43,7 @@ !if EXISTS($(TraceAltSrcDir)) TraceGeneratedNames = $(TraceGeneratedNames) \ traceRequestables.hpp \ - traceEventControl.hpp \ - traceProducer.cpp + traceEventControl.hpp !endif @@ -58,8 +57,7 @@ !if EXISTS($(TraceAltSrcDir)) TraceGeneratedFiles = $(TraceGeneratedFiles) \ $(TraceOutDir)/traceRequestables.hpp \ - $(TraceOutDir)/traceEventControl.hpp \ - $(TraceOutDir)/traceProducer.cpp + $(TraceOutDir)/traceEventControl.hpp !endif XSLT = $(QUIETLY) $(REMOTE) $(RUN_JAVA) -classpath $(JvmtiOutDir) jvmtiGen @@ -98,10 +96,6 @@ @echo Generating AltSrc $@ @$(XSLT) -IN $(TraceSrcDir)/trace.xml -XSL $(TraceAltSrcDir)/traceEventClasses.xsl -OUT $(TraceOutDir)/traceEventClasses.hpp -$(TraceOutDir)/traceProducer.cpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceProducer.xsl $(XML_DEPS) - @echo Generating AltSrc $@ - @$(XSLT) -IN $(TraceSrcDir)/trace.xml -XSL $(TraceAltSrcDir)/traceProducer.xsl -OUT $(TraceOutDir)/traceProducer.cpp - $(TraceOutDir)/traceRequestables.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceRequestables.xsl $(XML_DEPS) @echo Generating AltSrc $@ @$(XSLT) -IN $(TraceSrcDir)/trace.xml -XSL $(TraceAltSrcDir)/traceRequestables.xsl -OUT $(TraceOutDir)/traceRequestables.hpp
--- a/src/cpu/aarch64/vm/aarch64.ad Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/aarch64/vm/aarch64.ad Thu Mar 17 17:03:20 2016 +0000 @@ -3425,9 +3425,6 @@ // false => size gets scaled to BytesPerLong, ok. const bool Matcher::init_array_count_is_in_bytes = false; -// Threshold size for cleararray. -const int Matcher::init_array_short_size = 18 * BytesPerLong; - // Use conditional move (CMOVL) const int Matcher::long_cmove_cost() { // long cmoves are no more expensive than int cmoves @@ -4135,14 +4132,14 @@ MacroAssembler _masm(&cbuf); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, - &Assembler::ldxr, &MacroAssembler::cmp, &Assembler::stlxr); + Assembler::xword, /*acquire*/ false, /*release*/ true); %} enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{ MacroAssembler _masm(&cbuf); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, - &Assembler::ldxrw, &MacroAssembler::cmpw, &Assembler::stlxrw); + Assembler::word, /*acquire*/ false, /*release*/ true); %} @@ -4154,14 +4151,14 @@ MacroAssembler _masm(&cbuf); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, - &Assembler::ldaxr, &MacroAssembler::cmp, &Assembler::stlxr); + Assembler::xword, /*acquire*/ true, /*release*/ true); %} enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{ MacroAssembler _masm(&cbuf); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, - &Assembler::ldaxrw, &MacroAssembler::cmpw, &Assembler::stlxrw); + Assembler::word, /*acquire*/ true, /*release*/ true); %} @@ -4679,8 +4676,14 @@ // Compare object markOop with mark and if equal exchange scratch1 // with object markOop. - { + if (UseLSE) { + __ mov(tmp, disp_hdr); + __ casal(Assembler::xword, tmp, box, oop); + __ cmp(tmp, disp_hdr); + __ br(Assembler::EQ, cont); + } else { Label retry_load; + __ prfm(Address(oop), PSTL1STRM); __ bind(retry_load); __ ldaxr(tmp, oop); __ cmp(tmp, disp_hdr); @@ -4729,8 +4732,13 @@ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value)); __ mov(disp_hdr, zr); - { + if (UseLSE) { + __ mov(rscratch1, disp_hdr); + __ casal(Assembler::xword, rscratch1, rthread, tmp); + __ cmp(rscratch1, disp_hdr); + } else { Label retry_load, fail; + __ prfm(Address(tmp), PSTL1STRM); __ bind(retry_load); __ ldaxr(rscratch1, tmp); __ cmp(disp_hdr, rscratch1); @@ -4818,8 +4826,13 @@ // see the stack address of the basicLock in the markOop of the // object. - { + if (UseLSE) { + __ mov(tmp, box); + __ casl(Assembler::xword, tmp, disp_hdr, oop); + __ cmp(tmp, box); + } else { Label retry_load; + __ prfm(Address(oop), PSTL1STRM); __ bind(retry_load); __ ldxr(tmp, oop); __ cmp(box, tmp); @@ -13281,7 +13294,7 @@ __ fmovs($dst$$Register, as_FloatRegister($src$$reg)); %} - ins_pipe(pipe_class_memory); + ins_pipe(fp_f2i); %} @@ -13299,7 +13312,7 @@ __ fmovs(as_FloatRegister($dst$$reg), $src$$Register); %} - ins_pipe(pipe_class_memory); + ins_pipe(fp_i2f); %} @@ -13317,7 +13330,7 @@ __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); %} - ins_pipe(pipe_class_memory); + ins_pipe(fp_d2l); %} @@ -13335,7 +13348,7 @@ __ fmovd(as_FloatRegister($dst$$reg), $src$$Register); %} - ins_pipe(pipe_class_memory); + ins_pipe(fp_l2d); %} @@ -14191,6 +14204,25 @@ ins_pipe(pipe_cmp_branch); %} +instruct cmpN_imm0_branch(cmpOp cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{ + match(If cmp (CmpN op1 op2)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne + || n->in(1)->as_Bool()->_test._test == BoolTest::eq); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "cbw$cmp $op1, $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + if (cond == Assembler::EQ) + __ cbzw($op1$$Register, *L); + else + __ cbnzw($op1$$Register, *L); + %} + ins_pipe(pipe_cmp_branch); +%} + instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{ match(If cmp (CmpP (DecodeN oop) zero)); predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne @@ -14783,19 +14815,19 @@ %} instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt, - iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr) + iRegI_R0 result, rFlagsReg cr) %{ predicate(!CompactStrings); match(Set result (StrEquals (Binary str1 str2) cnt)); - effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); - - format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp" %} + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + + format %{ "String Equals $str1,$str2,$cnt -> $result" %} ins_encode %{ // Count is in 8-bit bytes; non-Compact chars are 16 bits. __ asrw($cnt$$Register, $cnt$$Register, 1); - __ string_equals($str1$$Register, $str2$$Register, - $cnt$$Register, $result$$Register, - $tmp$$Register); + __ arrays_equals($str1$$Register, $str2$$Register, + $result$$Register, $cnt$$Register, + 2, /*is_string*/true); %} ins_pipe(pipe_class_memory); %} @@ -14809,9 +14841,10 @@ format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %} ins_encode %{ - __ byte_arrays_equals($ary1$$Register, $ary2$$Register, - $result$$Register, $tmp$$Register); - %} + __ arrays_equals($ary1$$Register, $ary2$$Register, + $result$$Register, $tmp$$Register, + 1, /*is_string*/false); + %} ins_pipe(pipe_class_memory); %} @@ -14824,12 +14857,14 @@ format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %} ins_encode %{ - __ char_arrays_equals($ary1$$Register, $ary2$$Register, - $result$$Register, $tmp$$Register); + __ arrays_equals($ary1$$Register, $ary2$$Register, + $result$$Register, $tmp$$Register, + 2, /*is_string*/false); %} ins_pipe(pipe_class_memory); %} + // encode char[] to byte[] in ISO_8859_1 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len, vRegD_V0 Vtmp1, vRegD_V1 Vtmp2, @@ -16480,7 +16515,7 @@ as_FloatRegister($src$$reg), as_FloatRegister($shift$$reg)); %} - ins_pipe(vshift64_imm); + ins_pipe(vshift64); %} instruct vsll4I(vecX dst, vecX src, vecX shift) %{ @@ -16494,7 +16529,7 @@ as_FloatRegister($src$$reg), as_FloatRegister($shift$$reg)); %} - ins_pipe(vshift128_imm); + ins_pipe(vshift128); %} instruct vsrl2I(vecD dst, vecD src, vecX shift) %{ @@ -16507,7 +16542,7 @@ as_FloatRegister($src$$reg), as_FloatRegister($shift$$reg)); %} - ins_pipe(vshift64_imm); + ins_pipe(vshift64); %} instruct vsrl4I(vecX dst, vecX src, vecX shift) %{ @@ -16520,7 +16555,7 @@ as_FloatRegister($src$$reg), as_FloatRegister($shift$$reg)); %} - ins_pipe(vshift128_imm); + ins_pipe(vshift128); %} instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{ @@ -16638,7 +16673,7 @@ as_FloatRegister($src$$reg), (int)$shift$$constant & 63); %} - ins_pipe(vshift128); + ins_pipe(vshift128_imm); %} instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -972,7 +972,7 @@ // System void system(int op0, int op1, int CRn, int CRm, int op2, - Register rt = (Register)0b11111) + Register rt = dummy_reg) { starti; f(0b11010101000, 31, 21); @@ -1082,7 +1082,7 @@ #define INSN(NAME, opc) \ void NAME() { \ - branch_reg((Register)0b11111, opc); \ + branch_reg(dummy_reg, opc); \ } INSN(eret, 0b0100); @@ -1094,10 +1094,22 @@ enum operand_size { byte, halfword, word, xword }; void load_store_exclusive(Register Rs, Register Rt1, Register Rt2, - Register Rn, enum operand_size sz, int op, int o0) { + Register Rn, enum operand_size sz, int op, bool ordered) { starti; f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21); - rf(Rs, 16), f(o0, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0); + rf(Rs, 16), f(ordered, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0); + } + + void load_exclusive(Register dst, Register addr, + enum operand_size sz, bool ordered) { + load_store_exclusive(dummy_reg, dst, dummy_reg, addr, + sz, 0b010, ordered); + } + + void store_exclusive(Register status, Register new_val, Register addr, + enum operand_size sz, bool ordered) { + load_store_exclusive(status, new_val, dummy_reg, addr, + sz, 0b000, ordered); } #define INSN4(NAME, sz, op, o0) /* Four registers */ \ @@ -1109,19 +1121,19 @@ #define INSN3(NAME, sz, op, o0) /* Three registers */ \ void NAME(Register Rs, Register Rt, Register Rn) { \ guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \ - load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0); \ + load_store_exclusive(Rs, Rt, dummy_reg, Rn, sz, op, o0); \ } #define INSN2(NAME, sz, op, o0) /* Two registers */ \ void NAME(Register Rt, Register Rn) { \ - load_store_exclusive((Register)0b11111, Rt, (Register)0b11111, \ + load_store_exclusive(dummy_reg, Rt, dummy_reg, \ Rn, sz, op, o0); \ } #define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \ void NAME(Register Rt1, Register Rt2, Register Rn) { \ guarantee(Rt1 != Rt2, "unpredictable instruction"); \ - load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0); \ + load_store_exclusive(dummy_reg, Rt1, Rt2, Rn, sz, op, o0); \ } // bytes @@ -1169,6 +1181,46 @@ #undef INSN4 #undef INSN_FOO + // 8.1 Compare and swap extensions + void lse_cas(Register Rs, Register Rt, Register Rn, + enum operand_size sz, bool a, bool r, bool not_pair) { + starti; + if (! not_pair) { // Pair + assert(sz == word || sz == xword, "invalid size"); + /* The size bit is in bit 30, not 31 */ + sz = (operand_size)(sz == word ? 0b00:0b01); + } + f(sz, 31, 30), f(0b001000, 29, 24), f(1, 23), f(a, 22), f(1, 21); + rf(Rs, 16), f(r, 15), f(0b11111, 14, 10), rf(Rn, 5), rf(Rt, 0); + } + + // CAS +#define INSN(NAME, a, r) \ + void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) { \ + assert(Rs != Rn && Rs != Rt, "unpredictable instruction"); \ + lse_cas(Rs, Rt, Rn, sz, a, r, true); \ + } + INSN(cas, false, false) + INSN(casa, true, false) + INSN(casl, false, true) + INSN(casal, true, true) +#undef INSN + + // CASP +#define INSN(NAME, a, r) \ + void NAME(operand_size sz, Register Rs, Register Rs1, \ + Register Rt, Register Rt1, Register Rn) { \ + assert((Rs->encoding() & 1) == 0 && (Rt->encoding() & 1) == 0 && \ + Rs->successor() == Rs1 && Rt->successor() == Rt1 && \ + Rs != Rn && Rs1 != Rn && Rs != Rt, "invalid registers"); \ + lse_cas(Rs, Rt, Rn, sz, a, r, false); \ + } + INSN(casp, false, false) + INSN(caspa, true, false) + INSN(caspl, false, true) + INSN(caspal, true, true) +#undef INSN + // Load register (literal) #define INSN(NAME, opc, V) \ void NAME(Register Rt, address dest) { \
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1556,38 +1556,54 @@ } void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { - Label retry_load, nope; - // flush and load exclusive from the memory location - // and fail if it is not what we expect - __ bind(retry_load); - __ ldaxrw(rscratch1, addr); - __ cmpw(rscratch1, cmpval); - __ cset(rscratch1, Assembler::NE); - __ br(Assembler::NE, nope); - // if we store+flush with no intervening write rscratch1 wil be zero - __ stlxrw(rscratch1, newval, addr); - // retry so we only ever return after a load fails to compare - // ensures we don't return a stale value after a failed write. - __ cbnzw(rscratch1, retry_load); - __ bind(nope); + if (UseLSE) { + __ mov(rscratch1, cmpval); + __ casal(Assembler::word, rscratch1, newval, addr); + __ cmpw(rscratch1, cmpval); + __ cset(rscratch1, Assembler::NE); + } else { + Label retry_load, nope; + // flush and load exclusive from the memory location + // and fail if it is not what we expect + __ prfm(Address(addr), PSTL1STRM); + __ bind(retry_load); + __ ldaxrw(rscratch1, addr); + __ cmpw(rscratch1, cmpval); + __ cset(rscratch1, Assembler::NE); + __ br(Assembler::NE, nope); + // if we store+flush with no intervening write rscratch1 wil be zero + __ stlxrw(rscratch1, newval, addr); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + __ cbnzw(rscratch1, retry_load); + __ bind(nope); + } __ membar(__ AnyAny); } void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) { - Label retry_load, nope; - // flush and load exclusive from the memory location - // and fail if it is not what we expect - __ bind(retry_load); - __ ldaxr(rscratch1, addr); - __ cmp(rscratch1, cmpval); - __ cset(rscratch1, Assembler::NE); - __ br(Assembler::NE, nope); - // if we store+flush with no intervening write rscratch1 wil be zero - __ stlxr(rscratch1, newval, addr); - // retry so we only ever return after a load fails to compare - // ensures we don't return a stale value after a failed write. - __ cbnz(rscratch1, retry_load); - __ bind(nope); + if (UseLSE) { + __ mov(rscratch1, cmpval); + __ casal(Assembler::xword, rscratch1, newval, addr); + __ cmp(rscratch1, cmpval); + __ cset(rscratch1, Assembler::NE); + } else { + Label retry_load, nope; + // flush and load exclusive from the memory location + // and fail if it is not what we expect + __ prfm(Address(addr), PSTL1STRM); + __ bind(retry_load); + __ ldaxr(rscratch1, addr); + __ cmp(rscratch1, cmpval); + __ cset(rscratch1, Assembler::NE); + __ br(Assembler::NE, nope); + // if we store+flush with no intervening write rscratch1 wil be zero + __ stlxr(rscratch1, newval, addr); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + __ cbnz(rscratch1, retry_load); + __ bind(nope); + } __ membar(__ AnyAny); } @@ -3156,6 +3172,7 @@ } Label again; __ lea(tmp, addr); + __ prfm(Address(tmp), PSTL1STRM); __ bind(again); (_masm->*lda)(dst, tmp); (_masm->*add)(rscratch1, dst, inc); @@ -3175,6 +3192,7 @@ assert_different_registers(obj, addr.base(), tmp, rscratch2, dst); Label again; __ lea(tmp, addr); + __ prfm(Address(tmp), PSTL1STRM); __ bind(again); (_masm->*lda)(dst, tmp); (_masm->*stl)(rscratch2, obj, tmp);
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/aarch64/vm/globals_aarch64.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -76,6 +76,8 @@ // avoid biased locking while we are bootstrapping the aarch64 build define_pd_global(bool, UseBiasedLocking, false); +define_pd_global(intx, InitArrayShortSize, 18*BytesPerLong); + #if defined(COMPILER1) || defined(COMPILER2) define_pd_global(intx, InlineSmallCode, 1000); #endif @@ -101,9 +103,13 @@ \ product(bool, UseCRC32, false, \ "Use CRC32 instructions for CRC32 computation") \ + \ + product(bool, UseLSE, false, \ + "Use LSE instructions") \ // Don't attempt to use Neon on builtin sim until builtin sim supports it #define UseCRC32 false +#define UseSIMDForMemoryOps false #else #define UseBuiltinSim false @@ -121,6 +127,10 @@ "Use Neon for CRC32 computation") \ product(bool, UseCRC32, false, \ "Use CRC32 instructions for CRC32 computation") \ + product(bool, UseSIMDForMemoryOps, false, \ + "Use SIMD instructions in generated memory move code") \ + product(bool, UseLSE, false, \ + "Use LSE instructions") \ product(bool, TraceTraps, false, "Trace all traps the signal handler") #endif
--- a/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -74,7 +74,7 @@ void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) { address pc = _instructions->start() + pc_offset; NativeInstruction* inst = nativeInstruction_at(pc); - if (inst->is_adr_aligned()) { + if (inst->is_adr_aligned() || inst->is_ldr_literal()) { address dest = _constants->start() + data_offset; _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS)); TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset);
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1638,6 +1638,7 @@ void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) { Label retry_load; + prfm(Address(counter_addr), PSTL1STRM); bind(retry_load); // flush and load exclusive from the memory location ldxrw(tmp, counter_addr); @@ -2070,25 +2071,32 @@ // oldv holds comparison value // newv holds value to write in exchange // addr identifies memory word to compare against/update - // tmp returns 0/1 for success/failure - Label retry_load, nope; - - bind(retry_load); - // flush and load exclusive from the memory location - // and fail if it is not what we expect - ldaxr(tmp, addr); - cmp(tmp, oldv); - br(Assembler::NE, nope); - // if we store+flush with no intervening write tmp wil be zero - stlxr(tmp, newv, addr); - cbzw(tmp, succeed); - // retry so we only ever return after a load fails to compare - // ensures we don't return a stale value after a failed write. - b(retry_load); - // if the memory word differs we return it in oldv and signal a fail - bind(nope); - membar(AnyAny); - mov(oldv, tmp); + if (UseLSE) { + mov(tmp, oldv); + casal(Assembler::xword, oldv, newv, addr); + cmp(tmp, oldv); + br(Assembler::EQ, succeed); + membar(AnyAny); + } else { + Label retry_load, nope; + prfm(Address(addr), PSTL1STRM); + bind(retry_load); + // flush and load exclusive from the memory location + // and fail if it is not what we expect + ldaxr(tmp, addr); + cmp(tmp, oldv); + br(Assembler::NE, nope); + // if we store+flush with no intervening write tmp wil be zero + stlxr(tmp, newv, addr); + cbzw(tmp, succeed); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + b(retry_load); + // if the memory word differs we return it in oldv and signal a fail + bind(nope); + membar(AnyAny); + mov(oldv, tmp); + } if (fail) b(*fail); } @@ -2099,28 +2107,64 @@ // newv holds value to write in exchange // addr identifies memory word to compare against/update // tmp returns 0/1 for success/failure - Label retry_load, nope; - - bind(retry_load); - // flush and load exclusive from the memory location - // and fail if it is not what we expect - ldaxrw(tmp, addr); - cmp(tmp, oldv); - br(Assembler::NE, nope); - // if we store+flush with no intervening write tmp wil be zero - stlxrw(tmp, newv, addr); - cbzw(tmp, succeed); - // retry so we only ever return after a load fails to compare - // ensures we don't return a stale value after a failed write. - b(retry_load); - // if the memory word differs we return it in oldv and signal a fail - bind(nope); - membar(AnyAny); - mov(oldv, tmp); + if (UseLSE) { + mov(tmp, oldv); + casal(Assembler::word, oldv, newv, addr); + cmp(tmp, oldv); + br(Assembler::EQ, succeed); + membar(AnyAny); + } else { + Label retry_load, nope; + prfm(Address(addr), PSTL1STRM); + bind(retry_load); + // flush and load exclusive from the memory location + // and fail if it is not what we expect + ldaxrw(tmp, addr); + cmp(tmp, oldv); + br(Assembler::NE, nope); + // if we store+flush with no intervening write tmp wil be zero + stlxrw(tmp, newv, addr); + cbzw(tmp, succeed); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + b(retry_load); + // if the memory word differs we return it in oldv and signal a fail + bind(nope); + membar(AnyAny); + mov(oldv, tmp); + } if (fail) b(*fail); } +// A generic CAS; success or failure is in the EQ flag. +void MacroAssembler::cmpxchg(Register addr, Register expected, + Register new_val, + enum operand_size size, + bool acquire, bool release, + Register tmp) { + if (UseLSE) { + mov(tmp, expected); + lse_cas(tmp, new_val, addr, size, acquire, release, /*not_pair*/ true); + cmp(tmp, expected); + } else { + BLOCK_COMMENT("cmpxchg {"); + Label retry_load, done; + prfm(Address(addr), PSTL1STRM); + bind(retry_load); + load_exclusive(tmp, addr, size, acquire); + if (size == xword) + cmp(tmp, expected); + else + cmpw(tmp, expected); + br(Assembler::NE, done); + store_exclusive(tmp, new_val, addr, size, release); + cbnzw(tmp, retry_load); + bind(done); + BLOCK_COMMENT("} cmpxchg"); + } +} + static bool different(Register a, RegisterOrConstant b, Register c) { if (b.is_constant()) return a != c; @@ -2135,6 +2179,7 @@ result = different(prev, incr, addr) ? prev : rscratch2; \ \ Label retry_load; \ + prfm(Address(addr), PSTL1STRM); \ bind(retry_load); \ LDXR(result, addr); \ OP(rscratch1, result, incr); \ @@ -2157,6 +2202,7 @@ result = different(prev, newv, addr) ? prev : rscratch2; \ \ Label retry_load; \ + prfm(Address(addr), PSTL1STRM); \ bind(retry_load); \ LDXR(result, addr); \ STXR(rscratch1, newv, addr); \ @@ -4481,224 +4527,125 @@ BLOCK_COMMENT("} string_compare"); } - -void MacroAssembler::string_equals(Register str1, Register str2, - Register cnt, Register result, - Register tmp1) { - Label SAME_CHARS, DONE, SHORT_LOOP, SHORT_STRING, - NEXT_WORD; - - const Register tmp2 = rscratch1; - assert_different_registers(str1, str2, cnt, result, tmp1, tmp2, rscratch2); - - BLOCK_COMMENT("string_equals {"); - - // Start by assuming that the strings are not equal. - mov(result, zr); - - // A very short string - cmpw(cnt, 4); - br(Assembler::LT, SHORT_STRING); - - // Check if the strings start at the same location. - cmp(str1, str2); - br(Assembler::EQ, SAME_CHARS); - - // Compare longwords - { - subw(cnt, cnt, 4); // The last longword is a special case - - // Move both string pointers to the last longword of their - // strings, negate the remaining count, and convert it to bytes. - lea(str1, Address(str1, cnt, Address::uxtw(1))); - lea(str2, Address(str2, cnt, Address::uxtw(1))); - sub(cnt, zr, cnt, LSL, 1); - - // Loop, loading longwords and comparing them into rscratch2. - bind(NEXT_WORD); - ldr(tmp1, Address(str1, cnt)); - ldr(tmp2, Address(str2, cnt)); - adds(cnt, cnt, wordSize); - eor(rscratch2, tmp1, tmp2); - cbnz(rscratch2, DONE); - br(Assembler::LT, NEXT_WORD); - - // Last longword. In the case where length == 4 we compare the - // same longword twice, but that's still faster than another - // conditional branch. - - ldr(tmp1, Address(str1)); - ldr(tmp2, Address(str2)); - eor(rscratch2, tmp1, tmp2); - cbz(rscratch2, SAME_CHARS); - b(DONE); - } - - bind(SHORT_STRING); - // Is the length zero? - cbz(cnt, SAME_CHARS); - - bind(SHORT_LOOP); - load_unsigned_short(tmp1, Address(post(str1, 2))); - load_unsigned_short(tmp2, Address(post(str2, 2))); - subw(tmp1, tmp1, tmp2); - cbnz(tmp1, DONE); - sub(cnt, cnt, 1); - cbnz(cnt, SHORT_LOOP); - - // Strings are equal. - bind(SAME_CHARS); - mov(result, true); - - // That's it - bind(DONE); - - BLOCK_COMMENT("} string_equals"); -} - - -void MacroAssembler::byte_arrays_equals(Register ary1, Register ary2, - Register result, Register tmp1) +// Compare Strings or char/byte arrays. + +// is_string is true iff this is a string comparison. + +// For Strings we're passed the address of the first characters in a1 +// and a2 and the length in cnt1. + +// For byte and char arrays we're passed the arrays themselves and we +// have to extract length fields and do null checks here. + +// elem_size is the element size in bytes: either 1 or 2. + +// There are two implementations. For arrays >= 8 bytes, all +// comparisons (including the final one, which may overlap) are +// performed 8 bytes at a time. For arrays < 8 bytes, we compare a +// halfword, then a short, and then a byte. + +void MacroAssembler::arrays_equals(Register a1, Register a2, + Register result, Register cnt1, + int elem_size, bool is_string) { - Register cnt1 = rscratch1; - Register cnt2 = rscratch2; + Label SAME, DONE, SHORT, NEXT_WORD, ONE; + Register tmp1 = rscratch1; Register tmp2 = rscratch2; - - Label SAME, DIFFER, NEXT, TAIL07, TAIL03, TAIL01; - - int length_offset = arrayOopDesc::length_offset_in_bytes(); - int base_offset = arrayOopDesc::base_offset_in_bytes(T_BYTE); - - BLOCK_COMMENT("byte_arrays_equals {"); - - // different until proven equal - mov(result, false); - - // same array? - cmp(ary1, ary2); - br(Assembler::EQ, SAME); - - // ne if either null - cbz(ary1, DIFFER); - cbz(ary2, DIFFER); - - // lengths ne? - ldrw(cnt1, Address(ary1, length_offset)); - ldrw(cnt2, Address(ary2, length_offset)); - cmp(cnt1, cnt2); - br(Assembler::NE, DIFFER); - - lea(ary1, Address(ary1, base_offset)); - lea(ary2, Address(ary2, base_offset)); - - subs(cnt1, cnt1, 8); - br(LT, TAIL07); - - BIND(NEXT); - ldr(tmp1, Address(post(ary1, 8))); - ldr(tmp2, Address(post(ary2, 8))); - subs(cnt1, cnt1, 8); + Register cnt2 = tmp2; // cnt2 only used in array length compare + int elem_per_word = wordSize/elem_size; + int log_elem_size = exact_log2(elem_size); + int length_offset = arrayOopDesc::length_offset_in_bytes(); + int base_offset + = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); + + assert(elem_size == 1 || elem_size == 2, "must be char or byte"); + assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2); + + BLOCK_COMMENT(is_string ? "string_equals {" : "array_equals {"); + + mov(result, false); + + if (!is_string) { + // if (a==a2) + // return true; + eor(rscratch1, a1, a2); + cbz(rscratch1, SAME); + // if (a==null || a2==null) + // return false; + cbz(a1, DONE); + cbz(a2, DONE); + // if (a1.length != a2.length) + // return false; + ldrw(cnt1, Address(a1, length_offset)); + ldrw(cnt2, Address(a2, length_offset)); + eorw(tmp1, cnt1, cnt2); + cbnzw(tmp1, DONE); + + lea(a1, Address(a1, base_offset)); + lea(a2, Address(a2, base_offset)); + } + + // Check for short strings, i.e. smaller than wordSize. + subs(cnt1, cnt1, elem_per_word); + br(Assembler::LT, SHORT); + // Main 8 byte comparison loop. + bind(NEXT_WORD); { + ldr(tmp1, Address(post(a1, wordSize))); + ldr(tmp2, Address(post(a2, wordSize))); + subs(cnt1, cnt1, elem_per_word); eor(tmp1, tmp1, tmp2); - cbnz(tmp1, DIFFER); - br(GE, NEXT); - - BIND(TAIL07); // 0-7 bytes left, cnt1 = #bytes left - 4 - tst(cnt1, 0b100); - br(EQ, TAIL03); - ldrw(tmp1, Address(post(ary1, 4))); - ldrw(tmp2, Address(post(ary2, 4))); - cmp(tmp1, tmp2); - br(NE, DIFFER); - - BIND(TAIL03); // 0-3 bytes left, cnt1 = #bytes left - 4 - tst(cnt1, 0b10); - br(EQ, TAIL01); - ldrh(tmp1, Address(post(ary1, 2))); - ldrh(tmp2, Address(post(ary2, 2))); - cmp(tmp1, tmp2); - br(NE, DIFFER); - BIND(TAIL01); // 0-1 byte left - tst(cnt1, 0b01); - br(EQ, SAME); - ldrb(tmp1, ary1); - ldrb(tmp2, ary2); - cmp(tmp1, tmp2); - br(NE, DIFFER); - - BIND(SAME); - mov(result, true); - BIND(DIFFER); // result already set - - BLOCK_COMMENT("} byte_arrays_equals"); + cbnz(tmp1, DONE); + } br(GT, NEXT_WORD); + // Last longword. In the case where length == 4 we compare the + // same longword twice, but that's still faster than another + // conditional branch. + // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when + // length == 4. + if (log_elem_size > 0) + lsl(cnt1, cnt1, log_elem_size); + ldr(tmp1, Address(a1, cnt1)); + ldr(tmp2, Address(a2, cnt1)); + eor(tmp1, tmp1, tmp2); + cbnz(tmp1, DONE); + b(SAME); + + bind(SHORT); + Label TAIL03, TAIL01; + + tbz(cnt1, 2 - log_elem_size, TAIL03); // 0-7 bytes left. + { + ldrw(tmp1, Address(post(a1, 4))); + ldrw(tmp2, Address(post(a2, 4))); + eorw(tmp1, tmp1, tmp2); + cbnzw(tmp1, DONE); + } + bind(TAIL03); + tbz(cnt1, 1 - log_elem_size, TAIL01); // 0-3 bytes left. + { + ldrh(tmp1, Address(post(a1, 2))); + ldrh(tmp2, Address(post(a2, 2))); + eorw(tmp1, tmp1, tmp2); + cbnzw(tmp1, DONE); + } + bind(TAIL01); + if (elem_size == 1) { // Only needed when comparing byte arrays. + tbz(cnt1, 0, SAME); // 0-1 bytes left. + { + ldrb(tmp1, a1); + ldrb(tmp2, a2); + eorw(tmp1, tmp1, tmp2); + cbnzw(tmp1, DONE); + } + } + // Arrays are equal. + bind(SAME); + mov(result, true); + + // That's it. + bind(DONE); + BLOCK_COMMENT(is_string ? "} string_equals" : "} array_equals"); } -// Compare char[] arrays aligned to 4 bytes -void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, - Register result, Register tmp1) -{ - Register cnt1 = rscratch1; - Register cnt2 = rscratch2; - Register tmp2 = rscratch2; - - Label SAME, DIFFER, NEXT, TAIL03, TAIL01; - - int length_offset = arrayOopDesc::length_offset_in_bytes(); - int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); - - BLOCK_COMMENT("char_arrays_equals {"); - - // different until proven equal - mov(result, false); - - // same array? - cmp(ary1, ary2); - br(Assembler::EQ, SAME); - - // ne if either null - cbz(ary1, DIFFER); - cbz(ary2, DIFFER); - - // lengths ne? - ldrw(cnt1, Address(ary1, length_offset)); - ldrw(cnt2, Address(ary2, length_offset)); - cmp(cnt1, cnt2); - br(Assembler::NE, DIFFER); - - lea(ary1, Address(ary1, base_offset)); - lea(ary2, Address(ary2, base_offset)); - - subs(cnt1, cnt1, 4); - br(LT, TAIL03); - - BIND(NEXT); - ldr(tmp1, Address(post(ary1, 8))); - ldr(tmp2, Address(post(ary2, 8))); - subs(cnt1, cnt1, 4); - eor(tmp1, tmp1, tmp2); - cbnz(tmp1, DIFFER); - br(GE, NEXT); - - BIND(TAIL03); // 0-3 chars left, cnt1 = #chars left - 4 - tst(cnt1, 0b10); - br(EQ, TAIL01); - ldrw(tmp1, Address(post(ary1, 4))); - ldrw(tmp2, Address(post(ary2, 4))); - cmp(tmp1, tmp2); - br(NE, DIFFER); - BIND(TAIL01); // 0-1 chars left - tst(cnt1, 0b01); - br(EQ, SAME); - ldrh(tmp1, ary1); - ldrh(tmp2, ary2); - cmp(tmp1, tmp2); - br(NE, DIFFER); - - BIND(SAME); - mov(result, true); - BIND(DIFFER); // result already set - - BLOCK_COMMENT("} char_arrays_equals"); -} // encode char[] to byte[] in ISO_8859_1 void MacroAssembler::encode_iso_array(Register src, Register dst,
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -971,21 +971,10 @@ } // A generic CAS; success or failure is in the EQ flag. - template <typename T1, typename T2> void cmpxchg(Register addr, Register expected, Register new_val, - T1 load_insn, - void (MacroAssembler::*cmp_insn)(Register, Register), - T2 store_insn, - Register tmp = rscratch1) { - Label retry_load, done; - bind(retry_load); - (this->*load_insn)(tmp, addr); - (this->*cmp_insn)(tmp, expected); - br(Assembler::NE, done); - (this->*store_insn)(tmp, new_val, addr); - cbnzw(tmp, retry_load); - bind(done); - } + enum operand_size size, + bool acquire, bool release, + Register tmp = rscratch1); // Calls @@ -1186,13 +1175,11 @@ void string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, Register tmp1); - void string_equals(Register str1, Register str2, - Register cnt, Register result, - Register tmp1); - void char_arrays_equals(Register ary1, Register ary2, - Register result, Register tmp1); - void byte_arrays_equals(Register ary1, Register ary2, - Register result, Register tmp1); + + void arrays_equals(Register a1, Register a2, + Register result, Register cnt1, + int elem_size, bool is_string); + void encode_iso_array(Register src, Register dst, Register len, Register result, FloatRegister Vtmp1, FloatRegister Vtmp2,
--- a/src/cpu/aarch64/vm/nativeInst_aarch64.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/aarch64/vm/nativeInst_aarch64.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -105,13 +105,20 @@ inline friend NativeInstruction* nativeInstruction_at(address address); static bool is_adrp_at(address instr); + static bool is_ldr_literal_at(address instr); + + bool is_ldr_literal() { + return is_ldr_literal_at(addr_at(0)); + } + static bool is_ldrw_to_zr(address instr); static bool is_call_at(address instr) { const uint32_t insn = (*(uint32_t*)instr); return (insn >> 26) == 0b100101; } + bool is_call() { return is_call_at(addr_at(0)); }
--- a/src/cpu/aarch64/vm/register_aarch64.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/aarch64/vm/register_aarch64.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -107,6 +107,9 @@ CONSTANT_REGISTER_DECLARATION(Register, zr, (32)); CONSTANT_REGISTER_DECLARATION(Register, sp, (33)); +// Used as a filler in instructions where a register field is unused. +const Register dummy_reg = r31_sp; + // Use FloatRegister as shortcut class FloatRegisterImpl; typedef FloatRegisterImpl* FloatRegister;
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -163,30 +163,20 @@ sp_after_call_off = -26, d15_off = -26, - d14_off = -25, d13_off = -24, - d12_off = -23, d11_off = -22, - d10_off = -21, d9_off = -20, - d8_off = -19, r28_off = -18, - r27_off = -17, r26_off = -16, - r25_off = -15, r24_off = -14, - r23_off = -13, r22_off = -12, - r21_off = -11, r20_off = -10, - r19_off = -9, call_wrapper_off = -8, result_off = -7, result_type_off = -6, method_off = -5, entry_point_off = -4, - parameters_off = -3, parameter_size_off = -2, thread_off = -1, fp_f = 0, @@ -208,30 +198,20 @@ const Address result_type (rfp, result_type_off * wordSize); const Address method (rfp, method_off * wordSize); const Address entry_point (rfp, entry_point_off * wordSize); - const Address parameters (rfp, parameters_off * wordSize); const Address parameter_size(rfp, parameter_size_off * wordSize); const Address thread (rfp, thread_off * wordSize); const Address d15_save (rfp, d15_off * wordSize); - const Address d14_save (rfp, d14_off * wordSize); const Address d13_save (rfp, d13_off * wordSize); - const Address d12_save (rfp, d12_off * wordSize); const Address d11_save (rfp, d11_off * wordSize); - const Address d10_save (rfp, d10_off * wordSize); const Address d9_save (rfp, d9_off * wordSize); - const Address d8_save (rfp, d8_off * wordSize); const Address r28_save (rfp, r28_off * wordSize); - const Address r27_save (rfp, r27_off * wordSize); const Address r26_save (rfp, r26_off * wordSize); - const Address r25_save (rfp, r25_off * wordSize); const Address r24_save (rfp, r24_off * wordSize); - const Address r23_save (rfp, r23_off * wordSize); const Address r22_save (rfp, r22_off * wordSize); - const Address r21_save (rfp, r21_off * wordSize); const Address r20_save (rfp, r20_off * wordSize); - const Address r19_save (rfp, r19_off * wordSize); // stub code @@ -254,31 +234,20 @@ // rthread because we want to sanity check rthread later __ str(c_rarg7, thread); __ strw(c_rarg6, parameter_size); - __ str(c_rarg5, parameters); - __ str(c_rarg4, entry_point); - __ str(c_rarg3, method); - __ str(c_rarg2, result_type); - __ str(c_rarg1, result); - __ str(c_rarg0, call_wrapper); - __ str(r19, r19_save); - __ str(r20, r20_save); - __ str(r21, r21_save); - __ str(r22, r22_save); - __ str(r23, r23_save); - __ str(r24, r24_save); - __ str(r25, r25_save); - __ str(r26, r26_save); - __ str(r27, r27_save); - __ str(r28, r28_save); - - __ strd(v8, d8_save); - __ strd(v9, d9_save); - __ strd(v10, d10_save); - __ strd(v11, d11_save); - __ strd(v12, d12_save); - __ strd(v13, d13_save); - __ strd(v14, d14_save); - __ strd(v15, d15_save); + __ stp(c_rarg4, c_rarg5, entry_point); + __ stp(c_rarg2, c_rarg3, result_type); + __ stp(c_rarg0, c_rarg1, call_wrapper); + + __ stp(r20, r19, r20_save); + __ stp(r22, r21, r22_save); + __ stp(r24, r23, r24_save); + __ stp(r26, r25, r26_save); + __ stp(r28, r27, r28_save); + + __ stpd(v9, v8, d9_save); + __ stpd(v11, v10, d11_save); + __ stpd(v13, v12, d13_save); + __ stpd(v15, v14, d15_save); // install Java thread in global register now we have saved // whatever value it held @@ -385,33 +354,22 @@ #endif // restore callee-save registers - __ ldrd(v15, d15_save); - __ ldrd(v14, d14_save); - __ ldrd(v13, d13_save); - __ ldrd(v12, d12_save); - __ ldrd(v11, d11_save); - __ ldrd(v10, d10_save); - __ ldrd(v9, d9_save); - __ ldrd(v8, d8_save); - - __ ldr(r28, r28_save); - __ ldr(r27, r27_save); - __ ldr(r26, r26_save); - __ ldr(r25, r25_save); - __ ldr(r24, r24_save); - __ ldr(r23, r23_save); - __ ldr(r22, r22_save); - __ ldr(r21, r21_save); - __ ldr(r20, r20_save); - __ ldr(r19, r19_save); - __ ldr(c_rarg0, call_wrapper); - __ ldr(c_rarg1, result); + __ ldpd(v15, v14, d15_save); + __ ldpd(v13, v12, d13_save); + __ ldpd(v11, v10, d11_save); + __ ldpd(v9, v8, d9_save); + + __ ldp(r28, r27, r28_save); + __ ldp(r26, r25, r26_save); + __ ldp(r24, r23, r24_save); + __ ldp(r22, r21, r22_save); + __ ldp(r20, r19, r20_save); + + __ ldp(c_rarg0, c_rarg1, call_wrapper); __ ldrw(c_rarg2, result_type); __ ldr(c_rarg3, method); - __ ldr(c_rarg4, entry_point); - __ ldr(c_rarg5, parameters); - __ ldr(c_rarg6, parameter_size); - __ ldr(c_rarg7, thread); + __ ldp(c_rarg4, c_rarg5, entry_point); + __ ldp(c_rarg6, c_rarg7, parameter_size); #ifndef PRODUCT // tell the simulator we are about to end Java execution @@ -771,7 +729,7 @@ // // count is a count of words. // - // Precondition: count >= 2 + // Precondition: count >= 8 // // Postconditions: // @@ -783,6 +741,7 @@ void generate_copy_longs(Label &start, Register s, Register d, Register count, copy_direction direction) { int unit = wordSize * direction; + int bias = (UseSIMDForMemoryOps ? 4:2) * wordSize; int offset; const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6, @@ -792,7 +751,7 @@ assert_different_registers(rscratch1, t0, t1, t2, t3, t4, t5, t6, t7); assert_different_registers(s, d, count, rscratch1); - Label again, large, small; + Label again, drain; const char *stub_name; if (direction == copy_forwards) stub_name = "foward_copy_longs"; @@ -801,57 +760,35 @@ StubCodeMark mark(this, "StubRoutines", stub_name); __ align(CodeEntryAlignment); __ bind(start); - __ cmp(count, 8); - __ br(Assembler::LO, small); if (direction == copy_forwards) { - __ sub(s, s, 2 * wordSize); - __ sub(d, d, 2 * wordSize); + __ sub(s, s, bias); + __ sub(d, d, bias); } - __ subs(count, count, 16); - __ br(Assembler::GE, large); - - // 8 <= count < 16 words. Copy 8. - __ ldp(t0, t1, Address(s, 2 * unit)); - __ ldp(t2, t3, Address(s, 4 * unit)); - __ ldp(t4, t5, Address(s, 6 * unit)); - __ ldp(t6, t7, Address(__ pre(s, 8 * unit))); - - __ stp(t0, t1, Address(d, 2 * unit)); - __ stp(t2, t3, Address(d, 4 * unit)); - __ stp(t4, t5, Address(d, 6 * unit)); - __ stp(t6, t7, Address(__ pre(d, 8 * unit))); - - if (direction == copy_forwards) { - __ add(s, s, 2 * wordSize); - __ add(d, d, 2 * wordSize); - } - + +#ifdef ASSERT + // Make sure we are never given < 8 words { - Label L1, L2; - __ bind(small); - __ tbz(count, exact_log2(4), L1); - __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); - __ ldp(t2, t3, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); - __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); - __ stp(t2, t3, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); - __ bind(L1); - - __ tbz(count, 1, L2); - __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); - __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); - __ bind(L2); + Label L; + __ cmp(count, 8); + __ br(Assembler::GE, L); + __ stop("genrate_copy_longs called with < 8 words"); + __ bind(L); } - - __ ret(lr); - - __ align(CodeEntryAlignment); - __ bind(large); +#endif // Fill 8 registers - __ ldp(t0, t1, Address(s, 2 * unit)); - __ ldp(t2, t3, Address(s, 4 * unit)); - __ ldp(t4, t5, Address(s, 6 * unit)); - __ ldp(t6, t7, Address(__ pre(s, 8 * unit))); + if (UseSIMDForMemoryOps) { + __ ldpq(v0, v1, Address(s, 4 * unit)); + __ ldpq(v2, v3, Address(__ pre(s, 8 * unit))); + } else { + __ ldp(t0, t1, Address(s, 2 * unit)); + __ ldp(t2, t3, Address(s, 4 * unit)); + __ ldp(t4, t5, Address(s, 6 * unit)); + __ ldp(t6, t7, Address(__ pre(s, 8 * unit))); + } + + __ subs(count, count, 16); + __ br(Assembler::LO, drain); int prefetch = PrefetchCopyIntervalInBytes; bool use_stride = false; @@ -866,38 +803,56 @@ if (PrefetchCopyIntervalInBytes > 0) __ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP); - __ stp(t0, t1, Address(d, 2 * unit)); - __ ldp(t0, t1, Address(s, 2 * unit)); - __ stp(t2, t3, Address(d, 4 * unit)); - __ ldp(t2, t3, Address(s, 4 * unit)); - __ stp(t4, t5, Address(d, 6 * unit)); - __ ldp(t4, t5, Address(s, 6 * unit)); - __ stp(t6, t7, Address(__ pre(d, 8 * unit))); - __ ldp(t6, t7, Address(__ pre(s, 8 * unit))); + if (UseSIMDForMemoryOps) { + __ stpq(v0, v1, Address(d, 4 * unit)); + __ ldpq(v0, v1, Address(s, 4 * unit)); + __ stpq(v2, v3, Address(__ pre(d, 8 * unit))); + __ ldpq(v2, v3, Address(__ pre(s, 8 * unit))); + } else { + __ stp(t0, t1, Address(d, 2 * unit)); + __ ldp(t0, t1, Address(s, 2 * unit)); + __ stp(t2, t3, Address(d, 4 * unit)); + __ ldp(t2, t3, Address(s, 4 * unit)); + __ stp(t4, t5, Address(d, 6 * unit)); + __ ldp(t4, t5, Address(s, 6 * unit)); + __ stp(t6, t7, Address(__ pre(d, 8 * unit))); + __ ldp(t6, t7, Address(__ pre(s, 8 * unit))); + } __ subs(count, count, 8); __ br(Assembler::HS, again); // Drain - __ stp(t0, t1, Address(d, 2 * unit)); - __ stp(t2, t3, Address(d, 4 * unit)); - __ stp(t4, t5, Address(d, 6 * unit)); - __ stp(t6, t7, Address(__ pre(d, 8 * unit))); - - if (direction == copy_forwards) { - __ add(s, s, 2 * wordSize); - __ add(d, d, 2 * wordSize); + __ bind(drain); + if (UseSIMDForMemoryOps) { + __ stpq(v0, v1, Address(d, 4 * unit)); + __ stpq(v2, v3, Address(__ pre(d, 8 * unit))); + } else { + __ stp(t0, t1, Address(d, 2 * unit)); + __ stp(t2, t3, Address(d, 4 * unit)); + __ stp(t4, t5, Address(d, 6 * unit)); + __ stp(t6, t7, Address(__ pre(d, 8 * unit))); } { Label L1, L2; __ tbz(count, exact_log2(4), L1); - __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); - __ ldp(t2, t3, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); - __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); - __ stp(t2, t3, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); + if (UseSIMDForMemoryOps) { + __ ldpq(v0, v1, Address(__ pre(s, 4 * unit))); + __ stpq(v0, v1, Address(__ pre(d, 4 * unit))); + } else { + __ ldp(t0, t1, Address(s, 2 * unit)); + __ ldp(t2, t3, Address(__ pre(s, 4 * unit))); + __ stp(t0, t1, Address(d, 2 * unit)); + __ stp(t2, t3, Address(__ pre(d, 4 * unit))); + } __ bind(L1); + if (direction == copy_forwards) { + __ add(s, s, bias); + __ add(d, d, bias); + } + __ tbz(count, 1, L2); __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); @@ -973,16 +928,135 @@ int granularity = uabs(step); const Register t0 = r3, t1 = r4; + // <= 96 bytes do inline. Direction doesn't matter because we always + // load all the data before writing anything + Label copy4, copy8, copy16, copy32, copy80, copy128, copy_big, finish; + const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8; + const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12; + const Register send = r17, dend = r18; + + if (PrefetchCopyIntervalInBytes > 0) + __ prfm(Address(s, 0), PLDL1KEEP); + __ cmp(count, (UseSIMDForMemoryOps ? 96:80)/granularity); + __ br(Assembler::HI, copy_big); + + __ lea(send, Address(s, count, Address::lsl(exact_log2(granularity)))); + __ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity)))); + + __ cmp(count, 16/granularity); + __ br(Assembler::LS, copy16); + + __ cmp(count, 64/granularity); + __ br(Assembler::HI, copy80); + + __ cmp(count, 32/granularity); + __ br(Assembler::LS, copy32); + + // 33..64 bytes + if (UseSIMDForMemoryOps) { + __ ldpq(v0, v1, Address(s, 0)); + __ ldpq(v2, v3, Address(send, -32)); + __ stpq(v0, v1, Address(d, 0)); + __ stpq(v2, v3, Address(dend, -32)); + } else { + __ ldp(t0, t1, Address(s, 0)); + __ ldp(t2, t3, Address(s, 16)); + __ ldp(t4, t5, Address(send, -32)); + __ ldp(t6, t7, Address(send, -16)); + + __ stp(t0, t1, Address(d, 0)); + __ stp(t2, t3, Address(d, 16)); + __ stp(t4, t5, Address(dend, -32)); + __ stp(t6, t7, Address(dend, -16)); + } + __ b(finish); + + // 17..32 bytes + __ bind(copy32); + __ ldp(t0, t1, Address(s, 0)); + __ ldp(t2, t3, Address(send, -16)); + __ stp(t0, t1, Address(d, 0)); + __ stp(t2, t3, Address(dend, -16)); + __ b(finish); + + // 65..80/96 bytes + // (96 bytes if SIMD because we do 32 byes per instruction) + __ bind(copy80); + if (UseSIMDForMemoryOps) { + __ ldpq(v0, v1, Address(s, 0)); + __ ldpq(v2, v3, Address(s, 32)); + __ ldpq(v4, v5, Address(send, -32)); + __ stpq(v0, v1, Address(d, 0)); + __ stpq(v2, v3, Address(d, 32)); + __ stpq(v4, v5, Address(dend, -32)); + } else { + __ ldp(t0, t1, Address(s, 0)); + __ ldp(t2, t3, Address(s, 16)); + __ ldp(t4, t5, Address(s, 32)); + __ ldp(t6, t7, Address(s, 48)); + __ ldp(t8, t9, Address(send, -16)); + + __ stp(t0, t1, Address(d, 0)); + __ stp(t2, t3, Address(d, 16)); + __ stp(t4, t5, Address(d, 32)); + __ stp(t6, t7, Address(d, 48)); + __ stp(t8, t9, Address(dend, -16)); + } + __ b(finish); + + // 0..16 bytes + __ bind(copy16); + __ cmp(count, 8/granularity); + __ br(Assembler::LO, copy8); + + // 8..16 bytes + __ ldr(t0, Address(s, 0)); + __ ldr(t1, Address(send, -8)); + __ str(t0, Address(d, 0)); + __ str(t1, Address(dend, -8)); + __ b(finish); + + if (granularity < 8) { + // 4..7 bytes + __ bind(copy8); + __ tbz(count, 2 - exact_log2(granularity), copy4); + __ ldrw(t0, Address(s, 0)); + __ ldrw(t1, Address(send, -4)); + __ strw(t0, Address(d, 0)); + __ strw(t1, Address(dend, -4)); + __ b(finish); + if (granularity < 4) { + // 0..3 bytes + __ bind(copy4); + __ cbz(count, finish); // get rid of 0 case + if (granularity == 2) { + __ ldrh(t0, Address(s, 0)); + __ strh(t0, Address(d, 0)); + } else { // granularity == 1 + // Now 1..3 bytes. Handle the 1 and 2 byte case by copying + // the first and last byte. + // Handle the 3 byte case by loading and storing base + count/2 + // (count == 1 (s+0)->(d+0), count == 2,3 (s+1) -> (d+1)) + // This does means in the 1 byte case we load/store the same + // byte 3 times. + __ lsr(count, count, 1); + __ ldrb(t0, Address(s, 0)); + __ ldrb(t1, Address(send, -1)); + __ ldrb(t2, Address(s, count)); + __ strb(t0, Address(d, 0)); + __ strb(t1, Address(dend, -1)); + __ strb(t2, Address(d, count)); + } + __ b(finish); + } + } + + __ bind(copy_big); if (is_backwards) { __ lea(s, Address(s, count, Address::lsl(exact_log2(-step)))); __ lea(d, Address(d, count, Address::lsl(exact_log2(-step)))); } - Label tail; - - __ cmp(count, 16/granularity); - __ br(Assembler::LO, tail); - // Now we've got the small case out of the way we can align the // source address on a 2-word boundary. @@ -1028,8 +1102,6 @@ #endif } - __ cmp(count, 16/granularity); - __ br(Assembler::LT, tail); __ bind(aligned); // s is now 2-word-aligned. @@ -1043,9 +1115,11 @@ __ bl(copy_b); // And the tail. - - __ bind(tail); copy_memory_small(s, d, count, tmp, step); + + if (granularity >= 8) __ bind(copy8); + if (granularity >= 4) __ bind(copy4); + __ bind(finish); }
--- a/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1984,6 +1984,7 @@ __ push(rscratch3); Label L; __ mov(rscratch2, (address) &BytecodeCounter::_counter_value); + __ prfm(Address(rscratch2), PSTL1STRM); __ bind(L); __ ldxr(rscratch1, rscratch2); __ add(rscratch1, rscratch1, 1);
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -61,6 +61,10 @@ #define HWCAP_CRC32 (1<<7) #endif +#ifndef HWCAP_ATOMICS +#define HWCAP_ATOMICS (1<<8) +#endif + int VM_Version::_cpu; int VM_Version::_model; int VM_Version::_model2; @@ -172,6 +176,7 @@ if (auxv & HWCAP_AES) strcat(buf, ", aes"); if (auxv & HWCAP_SHA1) strcat(buf, ", sha1"); if (auxv & HWCAP_SHA2) strcat(buf, ", sha256"); + if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse"); _features_string = os::strdup(buf); @@ -191,6 +196,15 @@ FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); } + if (auxv & HWCAP_ATOMICS) { + if (FLAG_IS_DEFAULT(UseLSE)) + FLAG_SET_DEFAULT(UseLSE, true); + } else { + if (UseLSE) { + warning("UseLSE specified, but not supported on this CPU"); + } + } + if (auxv & HWCAP_AES) { UseAES = UseAES || FLAG_IS_DEFAULT(UseAES); UseAESIntrinsics =
--- a/src/cpu/ppc/vm/globalDefinitions_ppc.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/ppc/vm/globalDefinitions_ppc.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -47,7 +47,7 @@ // The expected size in bytes of a cache line, used to pad data structures. #define DEFAULT_CACHE_LINE_SIZE 128 -#if defined(COMPILER2) && defined(AIX) +#if defined(COMPILER2) && (defined(AIX) || defined(linux)) // Include Transactional Memory lock eliding optimization #define INCLUDE_RTM_OPT 1 #endif
--- a/src/cpu/ppc/vm/globals_ppc.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/ppc/vm/globals_ppc.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -76,6 +76,8 @@ define_pd_global(bool, CompactStrings, true); +define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); + // Platform dependent flag handling: flags only defined on this platform. #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \ \
--- a/src/cpu/ppc/vm/ppc.ad Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/ppc/vm/ppc.ad Thu Mar 17 17:03:20 2016 +0000 @@ -2137,8 +2137,6 @@ return decode; } */ -// Threshold size for cleararray. -const int Matcher::init_array_short_size = 8 * BytesPerLong; // false => size gets scaled to BytesPerLong, ok. const bool Matcher::init_array_count_is_in_bytes = false;
--- a/src/cpu/ppc/vm/vm_version_ppc.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/ppc/vm/vm_version_ppc.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -255,7 +255,16 @@ } #endif #ifdef linux - // TODO: check kernel version (we currently have too old versions only) + // At least Linux kernel 4.2, as the problematic behavior of syscalls + // being called in the middle of a transaction has been addressed. + // Please, refer to commit b4b56f9ecab40f3b4ef53e130c9f6663be491894 + // in Linux kernel source tree: https://goo.gl/Kc5i7A + if (os::Linux::os_version_is_known()) { + if (os::Linux::os_version() >= 0x040200) + os_too_old = false; + } else { + vm_exit_during_initialization("RTM can not be enabled: kernel version is unknown."); + } #endif if (os_too_old) { vm_exit_during_initialization("RTM is not supported on this OS version.");
--- a/src/cpu/sparc/vm/globals_sparc.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/sparc/vm/globals_sparc.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -90,6 +90,8 @@ define_pd_global(bool, CompactStrings, true); +define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); + #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \ \ product(intx, UseVIS, 99, \
--- a/src/cpu/sparc/vm/sparc.ad Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/sparc/vm/sparc.ad Thu Mar 17 17:03:20 2016 +0000 @@ -948,28 +948,28 @@ } #endif - uint instr; - instr = (Assembler::ldst_op << 30) - | (dst_enc << 25) - | (primary << 19) - | (src1_enc << 14); + uint instr = (Assembler::ldst_op << 30) + | (dst_enc << 25) + | (primary << 19) + | (src1_enc << 14); uint index = src2_enc; int disp = disp32; if (src1_enc == R_SP_enc || src1_enc == R_FP_enc) { disp += STACK_BIAS; - // Quick fix for JDK-8029668: check that stack offset fits, bailout if not + // Check that stack offset fits, load into O7 if not if (!Assembler::is_simm13(disp)) { - ra->C->record_method_not_compilable("unable to handle large constant offsets"); - return; + MacroAssembler _masm(&cbuf); + __ set(disp, O7); + if (index != R_G0_enc) { + __ add(O7, reg_to_register_object(index), O7); + } + index = R_O7_enc; + disp = 0; } } - // We should have a compiler bailout here rather than a guarantee. - // Better yet would be some mechanism to handle variable-size matches correctly. - guarantee(Assembler::is_simm13(disp), "Do not match large constant offsets" ); - if( disp == 0 ) { // use reg-reg form // bit 13 is already zero @@ -983,7 +983,7 @@ cbuf.insts()->emit_int32(instr); #ifdef ASSERT - { + if (VerifyOops) { MacroAssembler _masm(&cbuf); if (is_verified_oop_base) { __ verify_oop(reg_to_register_object(src1_enc)); @@ -1342,7 +1342,7 @@ // Figure out which register class each belongs in: rc_int, rc_float, rc_stack enum RC { rc_bad, rc_int, rc_float, rc_stack }; static enum RC rc_class( OptoReg::Name reg ) { - if( !OptoReg::is_valid(reg) ) return rc_bad; + if (!OptoReg::is_valid(reg)) return rc_bad; if (OptoReg::is_stack(reg)) return rc_stack; VMReg r = OptoReg::as_VMReg(reg); if (r->is_Register()) return rc_int; @@ -1350,66 +1350,79 @@ return rc_float; } -static int impl_helper(const MachNode* mach, CodeBuffer* cbuf, PhaseRegAlloc* ra, bool do_size, bool is_load, int offset, int reg, int opcode, const char *op_str, int size, outputStream* st ) { +#ifndef PRODUCT +ATTRIBUTE_PRINTF(2, 3) +static void print_helper(outputStream* st, const char* format, ...) { + if (st->position() > 0) { + st->cr(); + st->sp(); + } + va_list ap; + va_start(ap, format); + st->vprint(format, ap); + va_end(ap); +} +#endif // !PRODUCT + +static void impl_helper(const MachNode* mach, CodeBuffer* cbuf, PhaseRegAlloc* ra, bool is_load, int offset, int reg, int opcode, const char *op_str, outputStream* st) { if (cbuf) { emit_form3_mem_reg(*cbuf, ra, mach, opcode, -1, R_SP_enc, offset, 0, Matcher::_regEncode[reg]); } #ifndef PRODUCT - else if (!do_size) { - if (size != 0) st->print("\n\t"); - if (is_load) st->print("%s [R_SP + #%d],R_%s\t! spill",op_str,offset,OptoReg::regname(reg)); - else st->print("%s R_%s,[R_SP + #%d]\t! spill",op_str,OptoReg::regname(reg),offset); + else { + if (is_load) { + print_helper(st, "%s [R_SP + #%d],R_%s\t! spill", op_str, offset, OptoReg::regname(reg)); + } else { + print_helper(st, "%s R_%s,[R_SP + #%d]\t! spill", op_str, OptoReg::regname(reg), offset); + } } #endif - return size+4; } -static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int op1, int op2, const char *op_str, int size, outputStream* st ) { - if( cbuf ) emit3( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst], op1, 0, op2, Matcher::_regEncode[src] ); +static void impl_mov_helper(CodeBuffer *cbuf, int src, int dst, int op1, int op2, const char *op_str, outputStream* st) { + if (cbuf) { + emit3(*cbuf, Assembler::arith_op, Matcher::_regEncode[dst], op1, 0, op2, Matcher::_regEncode[src]); + } #ifndef PRODUCT - else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - st->print("%s R_%s,R_%s\t! spill",op_str,OptoReg::regname(src),OptoReg::regname(dst)); + else { + print_helper(st, "%s R_%s,R_%s\t! spill", op_str, OptoReg::regname(src), OptoReg::regname(dst)); } #endif - return size+4; } -uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, - PhaseRegAlloc *ra_, - bool do_size, - outputStream* st ) const { +static void mach_spill_copy_implementation_helper(const MachNode* mach, + CodeBuffer *cbuf, + PhaseRegAlloc *ra_, + outputStream* st) { // Get registers to move - OptoReg::Name src_second = ra_->get_reg_second(in(1)); - OptoReg::Name src_first = ra_->get_reg_first(in(1)); - OptoReg::Name dst_second = ra_->get_reg_second(this ); - OptoReg::Name dst_first = ra_->get_reg_first(this ); + OptoReg::Name src_second = ra_->get_reg_second(mach->in(1)); + OptoReg::Name src_first = ra_->get_reg_first(mach->in(1)); + OptoReg::Name dst_second = ra_->get_reg_second(mach); + OptoReg::Name dst_first = ra_->get_reg_first(mach); enum RC src_second_rc = rc_class(src_second); - enum RC src_first_rc = rc_class(src_first); + enum RC src_first_rc = rc_class(src_first); enum RC dst_second_rc = rc_class(dst_second); - enum RC dst_first_rc = rc_class(dst_first); - - assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); - - // Generate spill code! - int size = 0; - - if( src_first == dst_first && src_second == dst_second ) - return size; // Self copy, no move + enum RC dst_first_rc = rc_class(dst_first); + + assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register"); + + if (src_first == dst_first && src_second == dst_second) { + return; // Self copy, no move + } // -------------------------------------- // Check for mem-mem move. Load into unused float registers and fall into // the float-store case. - if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { + if (src_first_rc == rc_stack && dst_first_rc == rc_stack) { int offset = ra_->reg2offset(src_first); // Further check for aligned-adjacent pair, so we can use a double load - if( (src_first&1)==0 && src_first+1 == src_second ) { + if ((src_first&1) == 0 && src_first+1 == src_second) { src_second = OptoReg::Name(R_F31_num); src_second_rc = rc_float; - size = impl_helper(this,cbuf,ra_,do_size,true,offset,R_F30_num,Assembler::lddf_op3,"LDDF",size, st); + impl_helper(mach, cbuf, ra_, true, offset, R_F30_num, Assembler::lddf_op3, "LDDF", st); } else { - size = impl_helper(this,cbuf,ra_,do_size,true,offset,R_F30_num,Assembler::ldf_op3 ,"LDF ",size, st); + impl_helper(mach, cbuf, ra_, true, offset, R_F30_num, Assembler::ldf_op3, "LDF ", st); } src_first = OptoReg::Name(R_F30_num); src_first_rc = rc_float; @@ -1417,7 +1430,7 @@ if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { int offset = ra_->reg2offset(src_second); - size = impl_helper(this,cbuf,ra_,do_size,true,offset,R_F31_num,Assembler::ldf_op3,"LDF ",size, st); + impl_helper(mach, cbuf, ra_, true, offset, R_F31_num, Assembler::ldf_op3, "LDF ", st); src_second = OptoReg::Name(R_F31_num); src_second_rc = rc_float; } @@ -1427,36 +1440,38 @@ if (src_first_rc == rc_float && dst_first_rc == rc_int && UseVIS < 3) { int offset = frame::register_save_words*wordSize; if (cbuf) { - emit3_simm13( *cbuf, Assembler::arith_op, R_SP_enc, Assembler::sub_op3, R_SP_enc, 16 ); - impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stf_op3 ,"STF ",size, st); - impl_helper(this,cbuf,ra_,do_size,true ,offset,dst_first,Assembler::lduw_op3,"LDUW",size, st); - emit3_simm13( *cbuf, Assembler::arith_op, R_SP_enc, Assembler::add_op3, R_SP_enc, 16 ); + emit3_simm13(*cbuf, Assembler::arith_op, R_SP_enc, Assembler::sub_op3, R_SP_enc, 16); + impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stf_op3, "STF ", st); + impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::lduw_op3, "LDUW", st); + emit3_simm13(*cbuf, Assembler::arith_op, R_SP_enc, Assembler::add_op3, R_SP_enc, 16); } #ifndef PRODUCT - else if (!do_size) { - if (size != 0) st->print("\n\t"); - st->print( "SUB R_SP,16,R_SP\n"); - impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stf_op3 ,"STF ",size, st); - impl_helper(this,cbuf,ra_,do_size,true ,offset,dst_first,Assembler::lduw_op3,"LDUW",size, st); - st->print("\tADD R_SP,16,R_SP\n"); + else { + print_helper(st, "SUB R_SP,16,R_SP"); + impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stf_op3, "STF ", st); + impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::lduw_op3, "LDUW", st); + print_helper(st, "ADD R_SP,16,R_SP"); } #endif - size += 16; } // Check for float->int copy on T4 if (src_first_rc == rc_float && dst_first_rc == rc_int && UseVIS >= 3) { // Further check for aligned-adjacent pair, so we can use a double move - if ((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second) - return impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mdtox_opf,"MOVDTOX",size, st); - size = impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mstouw_opf,"MOVSTOUW",size, st); + if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mdtox_opf, "MOVDTOX", st); + return; + } + impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mstouw_opf, "MOVSTOUW", st); } // Check for int->float copy on T4 if (src_first_rc == rc_int && dst_first_rc == rc_float && UseVIS >= 3) { // Further check for aligned-adjacent pair, so we can use a double move - if ((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second) - return impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mxtod_opf,"MOVXTOD",size, st); - size = impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mwtos_opf,"MOVWTOS",size, st); + if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mxtod_opf, "MOVXTOD", st); + return; + } + impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mwtos_opf, "MOVWTOS", st); } // -------------------------------------- @@ -1466,10 +1481,10 @@ // there. Misaligned sources only come from native-long-returns (handled // special below). #ifndef _LP64 - if( src_first_rc == rc_int && // source is already big-endian + if (src_first_rc == rc_int && // source is already big-endian src_second_rc != rc_bad && // 64-bit move - ((dst_first&1)!=0 || dst_second != dst_first+1) ) { // misaligned dst - assert( (src_first&1)==0 && src_second == src_first+1, "source must be aligned" ); + ((dst_first & 1) != 0 || dst_second != dst_first + 1)) { // misaligned dst + assert((src_first & 1) == 0 && src_second == src_first + 1, "source must be aligned"); // Do the big-endian flop. OptoReg::Name tmp = dst_first ; dst_first = dst_second ; dst_second = tmp ; enum RC tmp_rc = dst_first_rc; dst_first_rc = dst_second_rc; dst_second_rc = tmp_rc; @@ -1478,30 +1493,28 @@ // -------------------------------------- // Check for integer reg-reg copy - if( src_first_rc == rc_int && dst_first_rc == rc_int ) { + if (src_first_rc == rc_int && dst_first_rc == rc_int) { #ifndef _LP64 - if( src_first == R_O0_num && src_second == R_O1_num ) { // Check for the evil O0/O1 native long-return case + if (src_first == R_O0_num && src_second == R_O1_num) { // Check for the evil O0/O1 native long-return case // Note: The _first and _second suffixes refer to the addresses of the the 2 halves of the 64-bit value // as stored in memory. On a big-endian machine like SPARC, this means that the _second // operand contains the least significant word of the 64-bit value and vice versa. OptoReg::Name tmp = OptoReg::Name(R_O7_num); - assert( (dst_first&1)==0 && dst_second == dst_first+1, "return a native O0/O1 long to an aligned-adjacent 64-bit reg" ); + assert((dst_first & 1) == 0 && dst_second == dst_first + 1, "return a native O0/O1 long to an aligned-adjacent 64-bit reg" ); // Shift O0 left in-place, zero-extend O1, then OR them into the dst - if( cbuf ) { - emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[tmp], Assembler::sllx_op3, Matcher::_regEncode[src_first], 0x1020 ); - emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[src_second], Assembler::srl_op3, Matcher::_regEncode[src_second], 0x0000 ); - emit3 ( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_first], Assembler:: or_op3, Matcher::_regEncode[tmp], 0, Matcher::_regEncode[src_second] ); + if ( cbuf ) { + emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[tmp], Assembler::sllx_op3, Matcher::_regEncode[src_first], 0x1020); + emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[src_second], Assembler::srl_op3, Matcher::_regEncode[src_second], 0x0000); + emit3 (*cbuf, Assembler::arith_op, Matcher::_regEncode[dst_first], Assembler:: or_op3, Matcher::_regEncode[tmp], 0, Matcher::_regEncode[src_second]); #ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - st->print("SLLX R_%s,32,R_%s\t! Move O0-first to O7-high\n\t", OptoReg::regname(src_first), OptoReg::regname(tmp)); - st->print("SRL R_%s, 0,R_%s\t! Zero-extend O1\n\t", OptoReg::regname(src_second), OptoReg::regname(src_second)); - st->print("OR R_%s,R_%s,R_%s\t! spill",OptoReg::regname(tmp), OptoReg::regname(src_second), OptoReg::regname(dst_first)); + } else { + print_helper(st, "SLLX R_%s,32,R_%s\t! Move O0-first to O7-high\n\t", OptoReg::regname(src_first), OptoReg::regname(tmp)); + print_helper(st, "SRL R_%s, 0,R_%s\t! Zero-extend O1\n\t", OptoReg::regname(src_second), OptoReg::regname(src_second)); + print_helper(st, "OR R_%s,R_%s,R_%s\t! spill",OptoReg::regname(tmp), OptoReg::regname(src_second), OptoReg::regname(dst_first)); #endif } - return size+12; - } - else if( dst_first == R_I0_num && dst_second == R_I1_num ) { + return; + } else if (dst_first == R_I0_num && dst_second == R_I1_num) { // returning a long value in I0/I1 // a SpillCopy must be able to target a return instruction's reg_class // Note: The _first and _second suffixes refer to the addresses of the the 2 halves of the 64-bit value @@ -1511,27 +1524,25 @@ if (src_first == dst_first) { tdest = OptoReg::Name(R_O7_num); - size += 4; } - if( cbuf ) { - assert( (src_first&1) == 0 && (src_first+1) == src_second, "return value was in an aligned-adjacent 64-bit reg"); + if (cbuf) { + assert((src_first & 1) == 0 && (src_first + 1) == src_second, "return value was in an aligned-adjacent 64-bit reg"); // Shift value in upper 32-bits of src to lower 32-bits of I0; move lower 32-bits to I1 // ShrL_reg_imm6 - emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[tdest], Assembler::srlx_op3, Matcher::_regEncode[src_second], 32 | 0x1000 ); + emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[tdest], Assembler::srlx_op3, Matcher::_regEncode[src_second], 32 | 0x1000); // ShrR_reg_imm6 src, 0, dst - emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_second], Assembler::srl_op3, Matcher::_regEncode[src_first], 0x0000 ); + emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[dst_second], Assembler::srl_op3, Matcher::_regEncode[src_first], 0x0000); if (tdest != dst_first) { - emit3 ( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_first], Assembler::or_op3, 0/*G0*/, 0/*op2*/, Matcher::_regEncode[tdest] ); + emit3 (*cbuf, Assembler::arith_op, Matcher::_regEncode[dst_first], Assembler::or_op3, 0/*G0*/, 0/*op2*/, Matcher::_regEncode[tdest]); } } #ifndef PRODUCT - else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); // %%%%% !!!!! - st->print("SRLX R_%s,32,R_%s\t! Extract MSW\n\t",OptoReg::regname(src_second),OptoReg::regname(tdest)); - st->print("SRL R_%s, 0,R_%s\t! Extract LSW\n\t",OptoReg::regname(src_first),OptoReg::regname(dst_second)); + else { + print_helper(st, "SRLX R_%s,32,R_%s\t! Extract MSW\n\t",OptoReg::regname(src_second),OptoReg::regname(tdest)); + print_helper(st, "SRL R_%s, 0,R_%s\t! Extract LSW\n\t",OptoReg::regname(src_first),OptoReg::regname(dst_second)); if (tdest != dst_first) { - st->print("MOV R_%s,R_%s\t! spill\n\t", OptoReg::regname(tdest), OptoReg::regname(dst_first)); + print_helper(st, "MOV R_%s,R_%s\t! spill\n\t", OptoReg::regname(tdest), OptoReg::regname(dst_first)); } } #endif // PRODUCT @@ -1539,65 +1550,77 @@ } #endif // !_LP64 // Else normal reg-reg copy - assert( src_second != dst_first, "smashed second before evacuating it" ); - size = impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::or_op3,0,"MOV ",size, st); - assert( (src_first&1) == 0 && (dst_first&1) == 0, "never move second-halves of int registers" ); + assert(src_second != dst_first, "smashed second before evacuating it"); + impl_mov_helper(cbuf, src_first, dst_first, Assembler::or_op3, 0, "MOV ", st); + assert((src_first & 1) == 0 && (dst_first & 1) == 0, "never move second-halves of int registers"); // This moves an aligned adjacent pair. // See if we are done. - if( src_first+1 == src_second && dst_first+1 == dst_second ) - return size; + if (src_first + 1 == src_second && dst_first + 1 == dst_second) { + return; + } } // Check for integer store - if( src_first_rc == rc_int && dst_first_rc == rc_stack ) { + if (src_first_rc == rc_int && dst_first_rc == rc_stack) { int offset = ra_->reg2offset(dst_first); // Further check for aligned-adjacent pair, so we can use a double store - if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second ) - return impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stx_op3,"STX ",size, st); - size = impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stw_op3,"STW ",size, st); + if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stx_op3, "STX ", st); + return; + } + impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stw_op3, "STW ", st); } // Check for integer load - if( dst_first_rc == rc_int && src_first_rc == rc_stack ) { + if (dst_first_rc == rc_int && src_first_rc == rc_stack) { int offset = ra_->reg2offset(src_first); // Further check for aligned-adjacent pair, so we can use a double load - if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second ) - return impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::ldx_op3 ,"LDX ",size, st); - size = impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::lduw_op3,"LDUW",size, st); + if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::ldx_op3, "LDX ", st); + return; + } + impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::lduw_op3, "LDUW", st); } // Check for float reg-reg copy - if( src_first_rc == rc_float && dst_first_rc == rc_float ) { + if (src_first_rc == rc_float && dst_first_rc == rc_float) { // Further check for aligned-adjacent pair, so we can use a double move - if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second ) - return impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::fpop1_op3,Assembler::fmovd_opf,"FMOVD",size, st); - size = impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::fpop1_op3,Assembler::fmovs_opf,"FMOVS",size, st); + if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + impl_mov_helper(cbuf, src_first, dst_first, Assembler::fpop1_op3, Assembler::fmovd_opf, "FMOVD", st); + return; + } + impl_mov_helper(cbuf, src_first, dst_first, Assembler::fpop1_op3, Assembler::fmovs_opf, "FMOVS", st); } // Check for float store - if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { + if (src_first_rc == rc_float && dst_first_rc == rc_stack) { int offset = ra_->reg2offset(dst_first); // Further check for aligned-adjacent pair, so we can use a double store - if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second ) - return impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stdf_op3,"STDF",size, st); - size = impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stf_op3 ,"STF ",size, st); + if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stdf_op3, "STDF", st); + return; + } + impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stf_op3, "STF ", st); } // Check for float load - if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { + if (dst_first_rc == rc_float && src_first_rc == rc_stack) { int offset = ra_->reg2offset(src_first); // Further check for aligned-adjacent pair, so we can use a double load - if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second ) - return impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::lddf_op3,"LDDF",size, st); - size = impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::ldf_op3 ,"LDF ",size, st); + if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::lddf_op3, "LDDF", st); + return; + } + impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::ldf_op3, "LDF ", st); } // -------------------------------------------------------------------- // Check for hi bits still needing moving. Only happens for misaligned // arguments to native calls. - if( src_second == dst_second ) - return size; // Self copy; no move - assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); + if (src_second == dst_second) { + return; // Self copy; no move + } + assert(src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad"); #ifndef _LP64 // In the LP64 build, all registers can be moved as aligned/adjacent @@ -1609,52 +1632,57 @@ // 32-bits of a 64-bit register, but are needed in low bits of another // register (else it's a hi-bits-to-hi-bits copy which should have // happened already as part of a 64-bit move) - if( src_second_rc == rc_int && dst_second_rc == rc_int ) { - assert( (src_second&1)==1, "its the evil O0/O1 native return case" ); - assert( (dst_second&1)==0, "should have moved with 1 64-bit move" ); + if (src_second_rc == rc_int && dst_second_rc == rc_int) { + assert((src_second & 1) == 1, "its the evil O0/O1 native return case"); + assert((dst_second & 1) == 0, "should have moved with 1 64-bit move"); // Shift src_second down to dst_second's low bits. - if( cbuf ) { - emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_second], Assembler::srlx_op3, Matcher::_regEncode[src_second-1], 0x1020 ); + if (cbuf) { + emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[dst_second], Assembler::srlx_op3, Matcher::_regEncode[src_second-1], 0x1020); #ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - st->print("SRLX R_%s,32,R_%s\t! spill: Move high bits down low",OptoReg::regname(src_second-1),OptoReg::regname(dst_second)); + } else { + print_helper(st, "SRLX R_%s,32,R_%s\t! spill: Move high bits down low", OptoReg::regname(src_second - 1), OptoReg::regname(dst_second)); #endif } - return size+4; + return; } // Check for high word integer store. Must down-shift the hi bits // into a temp register, then fall into the case of storing int bits. - if( src_second_rc == rc_int && dst_second_rc == rc_stack && (src_second&1)==1 ) { + if (src_second_rc == rc_int && dst_second_rc == rc_stack && (src_second & 1) == 1) { // Shift src_second down to dst_second's low bits. - if( cbuf ) { - emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[R_O7_num], Assembler::srlx_op3, Matcher::_regEncode[src_second-1], 0x1020 ); + if (cbuf) { + emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[R_O7_num], Assembler::srlx_op3, Matcher::_regEncode[src_second-1], 0x1020); #ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - st->print("SRLX R_%s,32,R_%s\t! spill: Move high bits down low",OptoReg::regname(src_second-1),OptoReg::regname(R_O7_num)); + } else { + print_helper(st, "SRLX R_%s,32,R_%s\t! spill: Move high bits down low", OptoReg::regname(src_second-1), OptoReg::regname(R_O7_num)); #endif } - size+=4; src_second = OptoReg::Name(R_O7_num); // Not R_O7H_num! } // Check for high word integer load - if( dst_second_rc == rc_int && src_second_rc == rc_stack ) - return impl_helper(this,cbuf,ra_,do_size,true ,ra_->reg2offset(src_second),dst_second,Assembler::lduw_op3,"LDUW",size, st); + if (dst_second_rc == rc_int && src_second_rc == rc_stack) + return impl_helper(this, cbuf, ra_, true, ra_->reg2offset(src_second), dst_second, Assembler::lduw_op3, "LDUW", size, st); // Check for high word integer store - if( src_second_rc == rc_int && dst_second_rc == rc_stack ) - return impl_helper(this,cbuf,ra_,do_size,false,ra_->reg2offset(dst_second),src_second,Assembler::stw_op3 ,"STW ",size, st); + if (src_second_rc == rc_int && dst_second_rc == rc_stack) + return impl_helper(this, cbuf, ra_, false, ra_->reg2offset(dst_second), src_second, Assembler::stw_op3, "STW ", size, st); // Check for high word float store - if( src_second_rc == rc_float && dst_second_rc == rc_stack ) - return impl_helper(this,cbuf,ra_,do_size,false,ra_->reg2offset(dst_second),src_second,Assembler::stf_op3 ,"STF ",size, st); + if (src_second_rc == rc_float && dst_second_rc == rc_stack) + return impl_helper(this, cbuf, ra_, false, ra_->reg2offset(dst_second), src_second, Assembler::stf_op3, "STF ", size, st); #endif // !_LP64 Unimplemented(); +} + +uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, + PhaseRegAlloc *ra_, + bool do_size, + outputStream* st) const { + assert(!do_size, "not supported"); + mach_spill_copy_implementation_helper(this, cbuf, ra_, st); return 0; } @@ -1669,19 +1697,19 @@ } uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { - return implementation( NULL, ra_, true, NULL ); + return MachNode::size(ra_); } //============================================================================= #ifndef PRODUCT -void MachNopNode::format( PhaseRegAlloc *, outputStream *st ) const { +void MachNopNode::format(PhaseRegAlloc *, outputStream *st) const { st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); } #endif -void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { +void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const { MacroAssembler _masm(&cbuf); - for(int i = 0; i < _count; i += 1) { + for (int i = 0; i < _count; i += 1) { __ nop(); } } @@ -1952,9 +1980,6 @@ // No scaling for the parameter the ClearArray node. const bool Matcher::init_array_count_is_in_bytes = true; -// Threshold size for cleararray. -const int Matcher::init_array_short_size = 8 * BytesPerLong; - // No additional cost for CMOVL. const int Matcher::long_cmove_cost() { return 0; } @@ -5197,7 +5222,6 @@ // No match rule to avoid chain rule match. effect(DEF dst, USE src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "LDF $src,$dst\t! stkI to regF" %} opcode(Assembler::ldf_op3); ins_encode(simple_form3_mem_reg(src, dst)); @@ -5208,7 +5232,6 @@ // No match rule to avoid chain rule match. effect(DEF dst, USE src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "LDDF $src,$dst\t! stkL to regD" %} opcode(Assembler::lddf_op3); ins_encode(simple_form3_mem_reg(src, dst)); @@ -5219,7 +5242,6 @@ // No match rule to avoid chain rule match. effect(DEF dst, USE src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STF $src,$dst\t! regF to stkI" %} opcode(Assembler::stf_op3); ins_encode(simple_form3_mem_reg(dst, src)); @@ -5230,7 +5252,6 @@ // No match rule to avoid chain rule match. effect(DEF dst, USE src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STDF $src,$dst\t! regD to stkL" %} opcode(Assembler::stdf_op3); ins_encode(simple_form3_mem_reg(dst, src)); @@ -5240,7 +5261,6 @@ instruct regI_to_stkLHi(stackSlotL dst, iRegI src) %{ effect(DEF dst, USE src); ins_cost(MEMORY_REF_COST*2); - size(8); format %{ "STW $src,$dst.hi\t! long\n\t" "STW R_G0,$dst.lo" %} opcode(Assembler::stw_op3); @@ -5252,7 +5272,6 @@ // No match rule to avoid chain rule match. effect(DEF dst, USE src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STX $src,$dst\t! regL to stkD" %} opcode(Assembler::stx_op3); ins_encode(simple_form3_mem_reg( dst, src ) ); @@ -5266,7 +5285,6 @@ match(Set dst src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "LDUW $src,$dst\t!stk" %} opcode(Assembler::lduw_op3); ins_encode(simple_form3_mem_reg( src, dst ) ); @@ -5278,7 +5296,6 @@ match(Set dst src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STW $src,$dst\t!stk" %} opcode(Assembler::stw_op3); ins_encode(simple_form3_mem_reg( dst, src ) ); @@ -5290,7 +5307,6 @@ match(Set dst src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "LDX $src,$dst\t! long" %} opcode(Assembler::ldx_op3); ins_encode(simple_form3_mem_reg( src, dst ) ); @@ -5302,7 +5318,6 @@ match(Set dst src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STX $src,$dst\t! long" %} opcode(Assembler::stx_op3); ins_encode(simple_form3_mem_reg( dst, src ) ); @@ -5314,7 +5329,6 @@ instruct stkP_to_regP( iRegP dst, stackSlotP src ) %{ match(Set dst src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "LDX $src,$dst\t!ptr" %} opcode(Assembler::ldx_op3); ins_encode(simple_form3_mem_reg( src, dst ) ); @@ -5325,7 +5339,6 @@ instruct regP_to_stkP(stackSlotP dst, iRegP src) %{ match(Set dst src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STX $src,$dst\t!ptr" %} opcode(Assembler::stx_op3); ins_encode(simple_form3_mem_reg( dst, src ) ); @@ -5771,7 +5784,6 @@ match(Set dst (LoadL_unaligned mem)); effect(KILL tmp); ins_cost(MEMORY_REF_COST*2+DEFAULT_COST); - size(16); format %{ "LDUW $mem+4,R_O7\t! misaligned long\n" "\tLDUW $mem ,$dst\n" "\tSLLX #32, $dst, $dst\n" @@ -5786,7 +5798,6 @@ match(Set dst (LoadRange mem)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "LDUW $mem,$dst\t! range" %} opcode(Assembler::lduw_op3); ins_encode(simple_form3_mem_reg( mem, dst ) ); @@ -5797,7 +5808,6 @@ instruct loadI_freg(regF dst, memory mem) %{ match(Set dst (LoadI mem)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "LDF $mem,$dst\t! for fitos/fitod" %} opcode(Assembler::ldf_op3); @@ -5876,7 +5886,6 @@ match(Set dst (LoadD mem)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "LDDF $mem,$dst" %} opcode(Assembler::lddf_op3); ins_encode(simple_form3_mem_reg( mem, dst ) ); @@ -5887,7 +5896,6 @@ instruct loadD_unaligned(regD_low dst, memory mem ) %{ match(Set dst (LoadD_unaligned mem)); ins_cost(MEMORY_REF_COST*2+DEFAULT_COST); - size(8); format %{ "LDF $mem ,$dst.hi\t! misaligned double\n" "\tLDF $mem+4,$dst.lo\t!" %} opcode(Assembler::ldf_op3); @@ -5900,7 +5908,6 @@ match(Set dst (LoadF mem)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "LDF $mem,$dst" %} opcode(Assembler::ldf_op3); ins_encode(simple_form3_mem_reg( mem, dst ) ); @@ -6119,7 +6126,6 @@ predicate(AllocatePrefetchInstr == 0); match( PrefetchAllocation mem ); ins_cost(MEMORY_REF_COST); - size(4); format %{ "PREFETCH $mem,2\t! Prefetch allocation" %} opcode(Assembler::prefetch_op3); @@ -6175,7 +6181,6 @@ match(Set mem (StoreB mem src)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STB $src,$mem\t! byte" %} opcode(Assembler::stb_op3); ins_encode(simple_form3_mem_reg( mem, src ) ); @@ -6186,7 +6191,6 @@ match(Set mem (StoreB mem src)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STB $src,$mem\t! byte" %} opcode(Assembler::stb_op3); ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); @@ -6197,7 +6201,6 @@ match(Set mem (StoreCM mem src)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STB $src,$mem\t! CMS card-mark byte 0" %} opcode(Assembler::stb_op3); ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); @@ -6209,7 +6212,6 @@ match(Set mem (StoreC mem src)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STH $src,$mem\t! short" %} opcode(Assembler::sth_op3); ins_encode(simple_form3_mem_reg( mem, src ) ); @@ -6220,7 +6222,6 @@ match(Set mem (StoreC mem src)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STH $src,$mem\t! short" %} opcode(Assembler::sth_op3); ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); @@ -6232,7 +6233,6 @@ match(Set mem (StoreI mem src)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STW $src,$mem" %} opcode(Assembler::stw_op3); ins_encode(simple_form3_mem_reg( mem, src ) ); @@ -6243,7 +6243,6 @@ instruct storeL(memory mem, iRegL src) %{ match(Set mem (StoreL mem src)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STX $src,$mem\t! long" %} opcode(Assembler::stx_op3); ins_encode(simple_form3_mem_reg( mem, src ) ); @@ -6254,7 +6253,6 @@ match(Set mem (StoreI mem src)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STW $src,$mem" %} opcode(Assembler::stw_op3); ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); @@ -6265,7 +6263,6 @@ match(Set mem (StoreL mem src)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STX $src,$mem" %} opcode(Assembler::stx_op3); ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); @@ -6277,7 +6274,6 @@ match(Set mem (StoreI mem src)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STF $src,$mem\t! after fstoi/fdtoi" %} opcode(Assembler::stf_op3); ins_encode(simple_form3_mem_reg( mem, src ) ); @@ -6288,7 +6284,6 @@ instruct storeP(memory dst, sp_ptr_RegP src) %{ match(Set dst (StoreP dst src)); ins_cost(MEMORY_REF_COST); - size(4); #ifndef _LP64 format %{ "STW $src,$dst\t! ptr" %} @@ -6304,7 +6299,6 @@ instruct storeP0(memory dst, immP0 src) %{ match(Set dst (StoreP dst src)); ins_cost(MEMORY_REF_COST); - size(4); #ifndef _LP64 format %{ "STW $src,$dst\t! ptr" %} @@ -6379,7 +6373,6 @@ match(Set mem (StoreD mem src)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STDF $src,$mem" %} opcode(Assembler::stdf_op3); ins_encode(simple_form3_mem_reg( mem, src ) ); @@ -6390,7 +6383,6 @@ match(Set mem (StoreD mem src)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STX $src,$mem" %} opcode(Assembler::stx_op3); ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); @@ -6402,7 +6394,6 @@ match(Set mem (StoreF mem src)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STF $src,$mem" %} opcode(Assembler::stf_op3); ins_encode(simple_form3_mem_reg( mem, src ) ); @@ -6413,7 +6404,6 @@ match(Set mem (StoreF mem src)); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STW $src,$mem\t! storeF0" %} opcode(Assembler::stw_op3); ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); @@ -7068,7 +7058,6 @@ ins_cost(MEMORY_REF_COST); #ifndef _LP64 - size(4); format %{ "LDUW $mem,$dst\t! ptr" %} opcode(Assembler::lduw_op3, 0, REGP_OP); #else @@ -8138,7 +8127,6 @@ effect(DEF dst, USE src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "LDUW $src,$dst\t! MoveF2I" %} opcode(Assembler::lduw_op3); ins_encode(simple_form3_mem_reg( src, dst ) ); @@ -8150,7 +8138,6 @@ effect(DEF dst, USE src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "LDF $src,$dst\t! MoveI2F" %} opcode(Assembler::ldf_op3); ins_encode(simple_form3_mem_reg(src, dst)); @@ -8162,7 +8149,6 @@ effect(DEF dst, USE src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "LDX $src,$dst\t! MoveD2L" %} opcode(Assembler::ldx_op3); ins_encode(simple_form3_mem_reg( src, dst ) ); @@ -8174,7 +8160,6 @@ effect(DEF dst, USE src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "LDDF $src,$dst\t! MoveL2D" %} opcode(Assembler::lddf_op3); ins_encode(simple_form3_mem_reg(src, dst)); @@ -8186,7 +8171,6 @@ effect(DEF dst, USE src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STF $src,$dst\t! MoveF2I" %} opcode(Assembler::stf_op3); ins_encode(simple_form3_mem_reg(dst, src)); @@ -8198,7 +8182,6 @@ effect(DEF dst, USE src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STW $src,$dst\t! MoveI2F" %} opcode(Assembler::stw_op3); ins_encode(simple_form3_mem_reg( dst, src ) ); @@ -8210,7 +8193,6 @@ effect(DEF dst, USE src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STDF $src,$dst\t! MoveD2L" %} opcode(Assembler::stdf_op3); ins_encode(simple_form3_mem_reg(dst, src)); @@ -8222,7 +8204,6 @@ effect(DEF dst, USE src); ins_cost(MEMORY_REF_COST); - size(4); format %{ "STX $src,$dst\t! MoveL2D" %} opcode(Assembler::stx_op3); ins_encode(simple_form3_mem_reg( dst, src ) ); @@ -8427,7 +8408,6 @@ instruct convI2D_mem(regD_low dst, memory mem) %{ match(Set dst (ConvI2D (LoadI mem))); ins_cost(DEFAULT_COST + MEMORY_REF_COST); - size(8); format %{ "LDF $mem,$dst\n\t" "FITOD $dst,$dst" %} opcode(Assembler::ldf_op3, Assembler::fitod_opf); @@ -8468,7 +8448,6 @@ instruct convI2F_mem( regF dst, memory mem ) %{ match(Set dst (ConvI2F (LoadI mem))); ins_cost(DEFAULT_COST + MEMORY_REF_COST); - size(8); format %{ "LDF $mem,$dst\n\t" "FITOS $dst,$dst" %} opcode(Assembler::ldf_op3, Assembler::fitos_opf);
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -463,3 +463,37 @@ } return result; } + + +int VM_Version::parse_features(const char* implementation) { + int features = unknown_m; + // Convert to UPPER case before compare. + char* impl = os::strdup_check_oom(implementation); + + for (int i = 0; impl[i] != 0; i++) + impl[i] = (char)toupper((uint)impl[i]); + + if (strstr(impl, "SPARC64") != NULL) { + features |= sparc64_family_m; + } else if (strstr(impl, "SPARC-M") != NULL) { + // M-series SPARC is based on T-series. + features |= (M_family_m | T_family_m); + } else if (strstr(impl, "SPARC-T") != NULL) { + features |= T_family_m; + if (strstr(impl, "SPARC-T1") != NULL) { + features |= T1_model_m; + } + } else { + if (strstr(impl, "SPARC") == NULL) { +#ifndef PRODUCT + // kstat on Solaris 8 virtual machines (branded zones) + // returns "(unsupported)" implementation. Solaris 8 is not + // supported anymore, but include this check to be on the + // safe side. + warning("Can't parse CPU implementation = '%s', assume generic SPARC", impl); +#endif + } + } + os::free((void*)impl); + return features; +}
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -121,7 +121,7 @@ static bool is_T1_model(int features) { return is_T_family(features) && ((features & T1_model_m) != 0); } static int maximum_niagara1_processor_count() { return 32; } - + static int parse_features(const char* implementation); public: // Initialization static void initialize();
--- a/src/cpu/x86/vm/assembler_x86.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/assembler_x86.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -777,6 +777,7 @@ case 0x6E: // movd case 0x7E: // movd case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush + case 0xFE: // paddd debug_only(has_disp32 = true); break; @@ -926,6 +927,7 @@ ip++; // skip P2, move to opcode // To find the end of instruction (which == end_pc_operand). switch (0xFF & *ip) { + case 0x22: // pinsrd r, r/a, #8 case 0x61: // pcmpestri r, r/a, #8 case 0x70: // pshufd r, r/a, #8 case 0x73: // psrldq r, #8 @@ -3953,6 +3955,83 @@ emit_int8((unsigned char)(0xC0 | encode)); } +void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) { + assert(VM_Version::supports_ssse3(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int8((unsigned char)0x0F); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(imm8); +} + +void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int8((unsigned char)0x0E); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(imm8); +} + +void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) { + assert(VM_Version::supports_sha(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes); + emit_int8((unsigned char)0xCC); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8((unsigned char)imm8); +} + +void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sha(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xC8); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sha(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xC9); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sha(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xCA); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// xmm0 is implicit additional source to this instruction. +void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sha(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xCB); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sha(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xCC); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sha(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xCD); + emit_int8((unsigned char)(0xC0 | encode)); +} + + void Assembler::shll(Register dst, int imm8) { assert(isShiftCount(imm8), "illegal shift count"); int encode = prefix_and_encode(dst->encoding()); @@ -4931,6 +5010,15 @@ emit_int8((unsigned char)(0xC0 | encode)); } +void Assembler::paddd(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFE); + emit_operand(dst, src); +} + void Assembler::paddq(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); @@ -5611,8 +5699,9 @@ } -void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { +void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_avx(), ""); + assert(imm8 <= 0x01, "imm8: %u", imm8); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int nds_enc = nds->is_valid() ? nds->encoding() : 0; @@ -5621,11 +5710,12 @@ emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits // 0x01 - insert into upper 128 bits - emit_int8(0x01); -} - -void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { + emit_int8(imm8 & 0x01); +} + +void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_evex(), ""); + assert(imm8 <= 0x01, "imm8: %u", imm8); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int nds_enc = nds->is_valid() ? nds->encoding() : 0; int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); @@ -5633,26 +5723,29 @@ emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 256 bits // 0x01 - insert into upper 256 bits - emit_int8(value & 0x01); -} - -void Assembler::vinsertf64x4h(XMMRegister dst, Address src, int value) { + emit_int8(imm8 & 0x01); +} + +void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) { assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); + assert(imm8 <= 0x01, "imm8: %u", imm8); InstructionMark im(this); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit); // swap src<->dst for encoding - vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x1A); emit_operand(dst, src); // 0x00 - insert into lower 256 bits - // 0x01 - insert into upper 128 bits - emit_int8(value & 0x01); -} - -void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { + // 0x01 - insert into upper 256 bits + emit_int8(imm8 & 0x01); +} + +void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_evex(), ""); + assert(imm8 <= 0x03, "imm8: %u", imm8); InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int nds_enc = nds->is_valid() ? nds->encoding() : 0; int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); @@ -5662,57 +5755,64 @@ // 0x01 - insert into q1 128 bits (128..255) // 0x02 - insert into q2 128 bits (256..383) // 0x03 - insert into q3 128 bits (384..511) - emit_int8(value & 0x3); -} - -void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) { + emit_int8(imm8 & 0x03); +} + +void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) { assert(VM_Version::supports_avx(), ""); assert(dst != xnoreg, "sanity"); + assert(imm8 <= 0x03, "imm8: %u", imm8); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + int nds_enc = nds->is_valid() ? nds->encoding() : 0; InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x18); emit_operand(dst, src); // 0x00 - insert into q0 128 bits (0..127) // 0x01 - insert into q1 128 bits (128..255) // 0x02 - insert into q2 128 bits (256..383) // 0x03 - insert into q3 128 bits (384..511) - emit_int8(value & 0x3); -} - -void Assembler::vinsertf128h(XMMRegister dst, Address src) { + emit_int8(imm8 & 0x03); +} + +void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) { assert(VM_Version::supports_avx(), ""); assert(dst != xnoreg, "sanity"); + assert(imm8 <= 0x01, "imm8: %u", imm8); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + int nds_enc = nds->is_valid() ? nds->encoding() : 0; InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x18); emit_operand(dst, src); + // 0x00 - insert into lower 128 bits // 0x01 - insert into upper 128 bits - emit_int8(0x01); -} - -void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) { + emit_int8(imm8 & 0x01); +} + +void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_avx(), ""); + assert(imm8 <= 0x01, "imm8: %u", imm8); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_int8((unsigned char)(0xC0 | encode)); - // 0x00 - insert into lower 128 bits - // 0x01 - insert into upper 128 bits - emit_int8(0x01); -} - -void Assembler::vextractf128h(Address dst, XMMRegister src) { + // 0x00 - extract from lower 128 bits + // 0x01 - extract from upper 128 bits + emit_int8(imm8 & 0x01); +} + +void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_avx(), ""); assert(src != xnoreg, "sanity"); + assert(imm8 <= 0x01, "imm8: %u", imm8); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); @@ -5720,12 +5820,14 @@ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_operand(src, dst); + // 0x00 - extract from lower 128 bits // 0x01 - extract from upper 128 bits - emit_int8(0x01); -} - -void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { + emit_int8(imm8 & 0x01); +} + +void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_avx2(), ""); + assert(imm8 <= 0x01, "imm8: %u", imm8); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int nds_enc = nds->is_valid() ? nds->encoding() : 0; @@ -5734,11 +5836,12 @@ emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits // 0x01 - insert into upper 128 bits - emit_int8(0x01); -} - -void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { + emit_int8(imm8 & 0x01); +} + +void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_evex(), ""); + assert(imm8 <= 0x01, "imm8: %u", imm8); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int nds_enc = nds->is_valid() ? nds->encoding() : 0; int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); @@ -5746,39 +5849,44 @@ emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 256 bits // 0x01 - insert into upper 256 bits - emit_int8(value & 0x01); -} - -void Assembler::vinserti128h(XMMRegister dst, Address src) { + emit_int8(imm8 & 0x01); +} + +void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) { assert(VM_Version::supports_avx2(), ""); assert(dst != xnoreg, "sanity"); + assert(imm8 <= 0x01, "imm8: %u", imm8); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + int nds_enc = nds->is_valid() ? nds->encoding() : 0; InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x38); emit_operand(dst, src); + // 0x00 - insert into lower 128 bits // 0x01 - insert into upper 128 bits - emit_int8(0x01); -} - -void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) { + emit_int8(imm8 & 0x01); +} + +void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_avx(), ""); + assert(imm8 <= 0x01, "imm8: %u", imm8); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x39); emit_int8((unsigned char)(0xC0 | encode)); - // 0x00 - insert into lower 128 bits - // 0x01 - insert into upper 128 bits - emit_int8(0x01); -} - -void Assembler::vextracti128h(Address dst, XMMRegister src) { + // 0x00 - extract from lower 128 bits + // 0x01 - extract from upper 128 bits + emit_int8(imm8 & 0x01); +} + +void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_avx2(), ""); assert(src != xnoreg, "sanity"); + assert(imm8 <= 0x01, "imm8: %u", imm8); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); @@ -5786,47 +5894,53 @@ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x39); emit_operand(src, dst); + // 0x00 - extract from lower 128 bits // 0x01 - extract from upper 128 bits - emit_int8(0x01); -} - -void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src, int value) { + emit_int8(imm8 & 0x01); +} + +void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_evex(), ""); + assert(imm8 <= 0x01, "imm8: %u", imm8); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x3B); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - extract from lower 256 bits // 0x01 - extract from upper 256 bits - emit_int8(value & 0x01); -} - -void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) { + emit_int8(imm8 & 0x01); +} + +void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_evex(), ""); + assert(imm8 <= 0x03, "imm8: %u", imm8); InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x39); emit_int8((unsigned char)(0xC0 | encode)); + // 0x00 - extract from bits 127:0 // 0x01 - extract from bits 255:128 // 0x02 - extract from bits 383:256 // 0x03 - extract from bits 511:384 - emit_int8(value & 0x3); -} - -void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src, int value) { + emit_int8(imm8 & 0x03); +} + +void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_evex(), ""); + assert(imm8 <= 0x01, "imm8: %u", imm8); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x1B); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - extract from lower 256 bits // 0x01 - extract from upper 256 bits - emit_int8(value & 0x1); -} - -void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) { + emit_int8(imm8 & 0x01); +} + +void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_evex(), ""); assert(src != xnoreg, "sanity"); + assert(imm8 <= 0x01, "imm8: %u", imm8); InstructionMark im(this); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */ EVEX_64bit); @@ -5835,11 +5949,12 @@ emit_operand(src, dst); // 0x00 - extract from lower 256 bits // 0x01 - extract from upper 256 bits - emit_int8(value & 0x01); -} - -void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) { + emit_int8(imm8 & 0x01); +} + +void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_avx(), ""); + assert(imm8 <= 0x03, "imm8: %u", imm8); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); @@ -5849,12 +5964,13 @@ // 0x01 - extract from bits 255:128 // 0x02 - extract from bits 383:256 // 0x03 - extract from bits 511:384 - emit_int8(value & 0x3); -} - -void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) { + emit_int8(imm8 & 0x03); +} + +void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_evex(), ""); assert(src != xnoreg, "sanity"); + assert(imm8 <= 0x03, "imm8: %u", imm8); InstructionMark im(this); InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); @@ -5865,19 +5981,21 @@ // 0x01 - extract from bits 255:128 // 0x02 - extract from bits 383:256 // 0x03 - extract from bits 511:384 - emit_int8(value & 0x3); -} - -void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) { + emit_int8(imm8 & 0x03); +} + +void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) { assert(VM_Version::supports_evex(), ""); + assert(imm8 <= 0x03, "imm8: %u", imm8); InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_int8((unsigned char)(0xC0 | encode)); + // 0x00 - extract from bits 127:0 // 0x01 - extract from bits 255:128 // 0x02 - extract from bits 383:256 // 0x03 - extract from bits 511:384 - emit_int8(value & 0x3); + emit_int8(imm8 & 0x03); } // duplicate 4-bytes integer data from src into 8 locations in dest
--- a/src/cpu/x86/vm/assembler_x86.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/assembler_x86.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1672,6 +1672,18 @@ void setb(Condition cc, Register dst); + void palignr(XMMRegister dst, XMMRegister src, int imm8); + void pblendw(XMMRegister dst, XMMRegister src, int imm8); + + void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8); + void sha1nexte(XMMRegister dst, XMMRegister src); + void sha1msg1(XMMRegister dst, XMMRegister src); + void sha1msg2(XMMRegister dst, XMMRegister src); + // xmm0 is implicit additional source to the following instruction. + void sha256rnds2(XMMRegister dst, XMMRegister src); + void sha256msg1(XMMRegister dst, XMMRegister src); + void sha256msg2(XMMRegister dst, XMMRegister src); + void shldl(Register dst, Register src); void shldl(Register dst, Register src, int8_t imm8); @@ -1868,6 +1880,7 @@ void paddb(XMMRegister dst, XMMRegister src); void paddw(XMMRegister dst, XMMRegister src); void paddd(XMMRegister dst, XMMRegister src); + void paddd(XMMRegister dst, Address src); void paddq(XMMRegister dst, XMMRegister src); void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); @@ -1958,33 +1971,31 @@ void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); - // Copy low 128bit into high 128bit of YMM registers. - void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); - void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src); - void vextractf128h(XMMRegister dst, XMMRegister src); - void vextracti128h(XMMRegister dst, XMMRegister src); - - // Load/store high 128bit of YMM registers which does not destroy other half. - void vinsertf128h(XMMRegister dst, Address src); - void vinserti128h(XMMRegister dst, Address src); - void vextractf128h(Address dst, XMMRegister src); - void vextracti128h(Address dst, XMMRegister src); - - // Copy low 256bit into high 256bit of ZMM registers. - void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); - void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); - void vextracti64x4h(XMMRegister dst, XMMRegister src, int value); - void vextractf64x4h(XMMRegister dst, XMMRegister src, int value); - void vextractf64x4h(Address dst, XMMRegister src, int value); - void vinsertf64x4h(XMMRegister dst, Address src, int value); - - // Copy targeted 128bit segments of the ZMM registers - void vextracti64x2h(XMMRegister dst, XMMRegister src, int value); - void vextractf64x2h(XMMRegister dst, XMMRegister src, int value); - void vextractf32x4h(XMMRegister dst, XMMRegister src, int value); - void vextractf32x4h(Address dst, XMMRegister src, int value); - void vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); - void vinsertf32x4h(XMMRegister dst, Address src, int value); + // 128bit copy from/to 256bit (YMM) vector registers + void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); + void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); + void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8); + void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8); + void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8); + void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8); + void vextractf128(Address dst, XMMRegister src, uint8_t imm8); + void vextracti128(Address dst, XMMRegister src, uint8_t imm8); + + // 256bit copy from/to 512bit (ZMM) vector registers + void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); + void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); + void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8); + void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8); + void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8); + void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8); + + // 128bit copy from/to 256bit (YMM) or 512bit (ZMM) vector registers + void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8); + void vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8); + void vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8); + void vextractf32x4(Address dst, XMMRegister src, uint8_t imm8); + void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); + void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8); // duplicate 4-bytes integer data from src into 8 locations in dest void vpbroadcastd(XMMRegister dst, XMMRegister src);
--- a/src/cpu/x86/vm/globals_x86.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/globals_x86.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -97,6 +97,8 @@ define_pd_global(bool, PreserveFramePointer, false); +define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); + #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \ \ develop(bool, IEEEPrecision, true, \
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -3445,7 +3445,7 @@ void MacroAssembler::movdqu(Address dst, XMMRegister src) { if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) { - Assembler::vextractf32x4h(dst, src, 0); + Assembler::vextractf32x4(dst, src, 0); } else { Assembler::movdqu(dst, src); } @@ -3453,7 +3453,7 @@ void MacroAssembler::movdqu(XMMRegister dst, Address src) { if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) { - Assembler::vinsertf32x4h(dst, src, 0); + Assembler::vinsertf32x4(dst, dst, src, 0); } else { Assembler::movdqu(dst, src); } @@ -3478,7 +3478,7 @@ void MacroAssembler::vmovdqu(Address dst, XMMRegister src) { if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) { - Assembler::vextractf64x4h(dst, src, 0); + vextractf64x4_low(dst, src); } else { Assembler::vmovdqu(dst, src); } @@ -3486,7 +3486,7 @@ void MacroAssembler::vmovdqu(XMMRegister dst, Address src) { if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) { - Assembler::vinsertf64x4h(dst, src, 0); + vinsertf64x4_low(dst, src); } else { Assembler::vmovdqu(dst, src); } @@ -5649,14 +5649,14 @@ // Save upper half of ZMM registers subptr(rsp, 32*num_xmm_regs); for (int n = 0; n < num_xmm_regs; n++) { - vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1); + vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n)); } } assert(UseAVX > 0, "256 bit vectors are supported only with AVX"); // Save upper half of YMM registers subptr(rsp, 16*num_xmm_regs); for (int n = 0; n < num_xmm_regs; n++) { - vextractf128h(Address(rsp, n*16), as_XMMRegister(n)); + vextractf128_high(Address(rsp, n*16), as_XMMRegister(n)); } } #endif @@ -5665,7 +5665,7 @@ #ifdef _LP64 if (VM_Version::supports_evex()) { for (int n = 0; n < num_xmm_regs; n++) { - vextractf32x4h(Address(rsp, n*16), as_XMMRegister(n), 0); + vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0); } } else { for (int n = 0; n < num_xmm_regs; n++) { @@ -5753,7 +5753,7 @@ #ifdef _LP64 if (VM_Version::supports_evex()) { for (int n = 0; n < num_xmm_regs; n++) { - vinsertf32x4h(as_XMMRegister(n), Address(rsp, n*16), 0); + vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0); } } else { for (int n = 0; n < num_xmm_regs; n++) { @@ -5771,12 +5771,12 @@ if (MaxVectorSize > 16) { // Restore upper half of YMM registers. for (int n = 0; n < num_xmm_regs; n++) { - vinsertf128h(as_XMMRegister(n), Address(rsp, n*16)); + vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16)); } addptr(rsp, 16*num_xmm_regs); if(UseAVX > 2) { for (int n = 0; n < num_xmm_regs; n++) { - vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1); + vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32)); } addptr(rsp, 32*num_xmm_regs); } @@ -7198,21 +7198,50 @@ } -void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) { +void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, bool is_large) { // cnt - number of qwords (8-byte words). // base - start address, qword aligned. + // is_large - if optimizers know cnt is larger than InitArrayShortSize assert(base==rdi, "base register must be edi for rep stos"); assert(tmp==rax, "tmp register must be eax for rep stos"); assert(cnt==rcx, "cnt register must be ecx for rep stos"); + assert(InitArrayShortSize % BytesPerLong == 0, + "InitArrayShortSize should be the multiple of BytesPerLong"); + + Label DONE; xorptr(tmp, tmp); + + if (!is_large) { + Label LOOP, LONG; + cmpptr(cnt, InitArrayShortSize/BytesPerLong); + jccb(Assembler::greater, LONG); + + NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM + + decrement(cnt); + jccb(Assembler::negative, DONE); // Zero length + + // Use individual pointer-sized stores for small counts: + BIND(LOOP); + movptr(Address(base, cnt, Address::times_ptr), tmp); + decrement(cnt); + jccb(Assembler::greaterEqual, LOOP); + jmpb(DONE); + + BIND(LONG); + } + + // Use longer rep-prefixed ops for non-small counts: if (UseFastStosb) { - shlptr(cnt,3); // convert to number of bytes + shlptr(cnt, 3); // convert to number of bytes rep_stosb(); } else { - NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM + NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM rep_stos(); } + + BIND(DONE); } #ifdef COMPILER2
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/macroAssembler_x86.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -48,7 +48,6 @@ // This is the base routine called by the different versions of call_VM_leaf. The interpreter // may customize this version by overriding it for its purposes (e.g., to save/restore // additional registers when doing a VM call). -#define COMMA , virtual void call_VM_leaf_base( address entry_point, // the entry point @@ -903,35 +902,66 @@ void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } void ldmxcsr(AddressLiteral src); + void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, + XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, + Register buf, Register state, Register ofs, Register limit, Register rsp, + bool multi_block); + +#ifdef _LP64 + void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, + XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, + Register buf, Register state, Register ofs, Register limit, Register rsp, + bool multi_block, XMMRegister shuf_mask); +#else + void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, + XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, + Register buf, Register state, Register ofs, Register limit, Register rsp, + bool multi_block); +#endif + void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, Register rdx, Register tmp); +#ifdef _LP64 void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register rax, Register rcx, Register rdx, Register tmp1 LP64_ONLY(COMMA Register tmp2)); + Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2); void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, - Register rdx NOT_LP64(COMMA Register tmp) LP64_ONLY(COMMA Register tmp1) - LP64_ONLY(COMMA Register tmp2) LP64_ONLY(COMMA Register tmp3) LP64_ONLY(COMMA Register tmp4)); + Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4); void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register rax, Register rbx LP64_ONLY(COMMA Register rcx), Register rdx - LP64_ONLY(COMMA Register tmp1) LP64_ONLY(COMMA Register tmp2) - LP64_ONLY(COMMA Register tmp3) LP64_ONLY(COMMA Register tmp4)); + Register rax, Register rbx, Register rcx, Register rdx, Register tmp1, Register tmp2, + Register tmp3, Register tmp4); void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register rax, Register rcx, Register rdx NOT_LP64(COMMA Register tmp) - LP64_ONLY(COMMA Register r8) LP64_ONLY(COMMA Register r9) - LP64_ONLY(COMMA Register r10) LP64_ONLY(COMMA Register r11)); + Register rax, Register rcx, Register rdx, Register tmp1, + Register tmp2, Register tmp3, Register tmp4); +#else + void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, + XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, + Register rax, Register rcx, Register rdx, Register tmp1); -#ifndef _LP64 + void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, + XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, + Register rdx, Register tmp); + + void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, + XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, + Register rax, Register rbx, Register rdx); + + void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, + XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, + Register rax, Register rcx, Register rdx, Register tmp); + void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp); + void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp); #endif @@ -1185,13 +1215,130 @@ void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); } void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); } - // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector. - void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { - if (UseAVX > 1) // vinserti128h is available only in AVX2 - Assembler::vinserti128h(dst, nds, src); - else - Assembler::vinsertf128h(dst, nds, src); + void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { + if (UseAVX > 1) { // vinserti128 is available only in AVX2 + Assembler::vinserti128(dst, nds, src, imm8); + } else { + Assembler::vinsertf128(dst, nds, src, imm8); + } + } + + void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) { + if (UseAVX > 1) { // vinserti128 is available only in AVX2 + Assembler::vinserti128(dst, nds, src, imm8); + } else { + Assembler::vinsertf128(dst, nds, src, imm8); + } + } + + void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) { + if (UseAVX > 1) { // vextracti128 is available only in AVX2 + Assembler::vextracti128(dst, src, imm8); + } else { + Assembler::vextractf128(dst, src, imm8); + } + } + + void vextracti128(Address dst, XMMRegister src, uint8_t imm8) { + if (UseAVX > 1) { // vextracti128 is available only in AVX2 + Assembler::vextracti128(dst, src, imm8); + } else { + Assembler::vextractf128(dst, src, imm8); + } + } + + // 128bit copy to/from high 128 bits of 256bit (YMM) vector registers + void vinserti128_high(XMMRegister dst, XMMRegister src) { + vinserti128(dst, dst, src, 1); + } + void vinserti128_high(XMMRegister dst, Address src) { + vinserti128(dst, dst, src, 1); + } + void vextracti128_high(XMMRegister dst, XMMRegister src) { + vextracti128(dst, src, 1); + } + void vextracti128_high(Address dst, XMMRegister src) { + vextracti128(dst, src, 1); + } + void vinsertf128_high(XMMRegister dst, XMMRegister src) { + vinsertf128(dst, dst, src, 1); + } + void vinsertf128_high(XMMRegister dst, Address src) { + vinsertf128(dst, dst, src, 1); + } + void vextractf128_high(XMMRegister dst, XMMRegister src) { + vextractf128(dst, src, 1); + } + void vextractf128_high(Address dst, XMMRegister src) { + vextractf128(dst, src, 1); + } + + // 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers + void vinserti64x4_high(XMMRegister dst, XMMRegister src) { + vinserti64x4(dst, dst, src, 1); } + void vinsertf64x4_high(XMMRegister dst, XMMRegister src) { + vinsertf64x4(dst, dst, src, 1); + } + void vextracti64x4_high(XMMRegister dst, XMMRegister src) { + vextracti64x4(dst, src, 1); + } + void vextractf64x4_high(XMMRegister dst, XMMRegister src) { + vextractf64x4(dst, src, 1); + } + void vextractf64x4_high(Address dst, XMMRegister src) { + vextractf64x4(dst, src, 1); + } + void vinsertf64x4_high(XMMRegister dst, Address src) { + vinsertf64x4(dst, dst, src, 1); + } + + // 128bit copy to/from low 128 bits of 256bit (YMM) vector registers + void vinserti128_low(XMMRegister dst, XMMRegister src) { + vinserti128(dst, dst, src, 0); + } + void vinserti128_low(XMMRegister dst, Address src) { + vinserti128(dst, dst, src, 0); + } + void vextracti128_low(XMMRegister dst, XMMRegister src) { + vextracti128(dst, src, 0); + } + void vextracti128_low(Address dst, XMMRegister src) { + vextracti128(dst, src, 0); + } + void vinsertf128_low(XMMRegister dst, XMMRegister src) { + vinsertf128(dst, dst, src, 0); + } + void vinsertf128_low(XMMRegister dst, Address src) { + vinsertf128(dst, dst, src, 0); + } + void vextractf128_low(XMMRegister dst, XMMRegister src) { + vextractf128(dst, src, 0); + } + void vextractf128_low(Address dst, XMMRegister src) { + vextractf128(dst, src, 0); + } + + // 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers + void vinserti64x4_low(XMMRegister dst, XMMRegister src) { + vinserti64x4(dst, dst, src, 0); + } + void vinsertf64x4_low(XMMRegister dst, XMMRegister src) { + vinsertf64x4(dst, dst, src, 0); + } + void vextracti64x4_low(XMMRegister dst, XMMRegister src) { + vextracti64x4(dst, src, 0); + } + void vextractf64x4_low(XMMRegister dst, XMMRegister src) { + vextractf64x4(dst, src, 0); + } + void vextractf64x4_low(Address dst, XMMRegister src) { + vextractf64x4(dst, src, 0); + } + void vinsertf64x4_low(XMMRegister dst, Address src) { + vinsertf64x4(dst, dst, src, 0); + } + // Carry-Less Multiplication Quadword void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) { @@ -1284,8 +1431,9 @@ // C2 compiled method's prolog code. void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b); - // clear memory of size 'cnt' qwords, starting at 'base'. - void clear_mem(Register base, Register cnt, Register rtmp); + // clear memory of size 'cnt' qwords, starting at 'base'; + // if 'is_large' is set, do not try to produce short loop + void clear_mem(Register base, Register cnt, Register rtmp, bool is_large); #ifdef COMPILER2 void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/x86/vm/macroAssembler_x86_sha.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -0,0 +1,495 @@ +/* +* Copyright (c) 2016, Intel Corporation. +* +* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +* +* This code is free software; you can redistribute it and/or modify it +* under the terms of the GNU General Public License version 2 only, as +* published by the Free Software Foundation. +* +* This code is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +* version 2 for more details (a copy is included in the LICENSE file that +* accompanied this code). +* +* You should have received a copy of the GNU General Public License version +* 2 along with this work; if not, write to the Free Software Foundation, +* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +* +* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +* or visit www.oracle.com if you need additional information or have any +* questions. +* +*/ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "macroAssembler_x86.hpp" + +// ofs and limit are used for multi-block byte array. +// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) +void MacroAssembler::fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, + XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, + Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block) { + + Label start, done_hash, loop0; + + address upper_word_mask = StubRoutines::x86::upper_word_mask_addr(); + address shuffle_byte_flip_mask = StubRoutines::x86::shuffle_byte_flip_mask_addr(); + + bind(start); + movdqu(abcd, Address(state, 0)); + pinsrd(e0, Address(state, 16), 3); + movdqu(shuf_mask, ExternalAddress(upper_word_mask)); // 0xFFFFFFFF000000000000000000000000 + pand(e0, shuf_mask); + pshufd(abcd, abcd, 0x1B); + movdqu(shuf_mask, ExternalAddress(shuffle_byte_flip_mask)); //0x000102030405060708090a0b0c0d0e0f + + bind(loop0); + // Save hash values for addition after rounds + movdqu(Address(rsp, 0), e0); + movdqu(Address(rsp, 16), abcd); + + + // Rounds 0 - 3 + movdqu(msg0, Address(buf, 0)); + pshufb(msg0, shuf_mask); + paddd(e0, msg0); + movdqa(e1, abcd); + sha1rnds4(abcd, e0, 0); + + // Rounds 4 - 7 + movdqu(msg1, Address(buf, 16)); + pshufb(msg1, shuf_mask); + sha1nexte(e1, msg1); + movdqa(e0, abcd); + sha1rnds4(abcd, e1, 0); + sha1msg1(msg0, msg1); + + // Rounds 8 - 11 + movdqu(msg2, Address(buf, 32)); + pshufb(msg2, shuf_mask); + sha1nexte(e0, msg2); + movdqa(e1, abcd); + sha1rnds4(abcd, e0, 0); + sha1msg1(msg1, msg2); + pxor(msg0, msg2); + + // Rounds 12 - 15 + movdqu(msg3, Address(buf, 48)); + pshufb(msg3, shuf_mask); + sha1nexte(e1, msg3); + movdqa(e0, abcd); + sha1msg2(msg0, msg3); + sha1rnds4(abcd, e1, 0); + sha1msg1(msg2, msg3); + pxor(msg1, msg3); + + // Rounds 16 - 19 + sha1nexte(e0, msg0); + movdqa(e1, abcd); + sha1msg2(msg1, msg0); + sha1rnds4(abcd, e0, 0); + sha1msg1(msg3, msg0); + pxor(msg2, msg0); + + // Rounds 20 - 23 + sha1nexte(e1, msg1); + movdqa(e0, abcd); + sha1msg2(msg2, msg1); + sha1rnds4(abcd, e1, 1); + sha1msg1(msg0, msg1); + pxor(msg3, msg1); + + // Rounds 24 - 27 + sha1nexte(e0, msg2); + movdqa(e1, abcd); + sha1msg2(msg3, msg2); + sha1rnds4(abcd, e0, 1); + sha1msg1(msg1, msg2); + pxor(msg0, msg2); + + // Rounds 28 - 31 + sha1nexte(e1, msg3); + movdqa(e0, abcd); + sha1msg2(msg0, msg3); + sha1rnds4(abcd, e1, 1); + sha1msg1(msg2, msg3); + pxor(msg1, msg3); + + // Rounds 32 - 35 + sha1nexte(e0, msg0); + movdqa(e1, abcd); + sha1msg2(msg1, msg0); + sha1rnds4(abcd, e0, 1); + sha1msg1(msg3, msg0); + pxor(msg2, msg0); + + // Rounds 36 - 39 + sha1nexte(e1, msg1); + movdqa(e0, abcd); + sha1msg2(msg2, msg1); + sha1rnds4(abcd, e1, 1); + sha1msg1(msg0, msg1); + pxor(msg3, msg1); + + // Rounds 40 - 43 + sha1nexte(e0, msg2); + movdqa(e1, abcd); + sha1msg2(msg3, msg2); + sha1rnds4(abcd, e0, 2); + sha1msg1(msg1, msg2); + pxor(msg0, msg2); + + // Rounds 44 - 47 + sha1nexte(e1, msg3); + movdqa(e0, abcd); + sha1msg2(msg0, msg3); + sha1rnds4(abcd, e1, 2); + sha1msg1(msg2, msg3); + pxor(msg1, msg3); + + // Rounds 48 - 51 + sha1nexte(e0, msg0); + movdqa(e1, abcd); + sha1msg2(msg1, msg0); + sha1rnds4(abcd, e0, 2); + sha1msg1(msg3, msg0); + pxor(msg2, msg0); + + // Rounds 52 - 55 + sha1nexte(e1, msg1); + movdqa(e0, abcd); + sha1msg2(msg2, msg1); + sha1rnds4(abcd, e1, 2); + sha1msg1(msg0, msg1); + pxor(msg3, msg1); + + // Rounds 56 - 59 + sha1nexte(e0, msg2); + movdqa(e1, abcd); + sha1msg2(msg3, msg2); + sha1rnds4(abcd, e0, 2); + sha1msg1(msg1, msg2); + pxor(msg0, msg2); + + // Rounds 60 - 63 + sha1nexte(e1, msg3); + movdqa(e0, abcd); + sha1msg2(msg0, msg3); + sha1rnds4(abcd, e1, 3); + sha1msg1(msg2, msg3); + pxor(msg1, msg3); + + // Rounds 64 - 67 + sha1nexte(e0, msg0); + movdqa(e1, abcd); + sha1msg2(msg1, msg0); + sha1rnds4(abcd, e0, 3); + sha1msg1(msg3, msg0); + pxor(msg2, msg0); + + // Rounds 68 - 71 + sha1nexte(e1, msg1); + movdqa(e0, abcd); + sha1msg2(msg2, msg1); + sha1rnds4(abcd, e1, 3); + pxor(msg3, msg1); + + // Rounds 72 - 75 + sha1nexte(e0, msg2); + movdqa(e1, abcd); + sha1msg2(msg3, msg2); + sha1rnds4(abcd, e0, 3); + + // Rounds 76 - 79 + sha1nexte(e1, msg3); + movdqa(e0, abcd); + sha1rnds4(abcd, e1, 3); + + // add current hash values with previously saved + movdqu(msg0, Address(rsp, 0)); + sha1nexte(e0, msg0); + movdqu(msg0, Address(rsp, 16)); + paddd(abcd, msg0); + + if (multi_block) { + // increment data pointer and loop if more to process + addptr(buf, 64); + addptr(ofs, 64); + cmpptr(ofs, limit); + jcc(Assembler::belowEqual, loop0); + movptr(rax, ofs); //return ofs + } + // write hash values back in the correct order + pshufd(abcd, abcd, 0x1b); + movdqu(Address(state, 0), abcd); + pextrd(Address(state, 16), e0, 3); + + bind(done_hash); + +} + +// xmm0 (msg) is used as an implicit argument to sh256rnds2 +// and state0 and state1 can never use xmm0 register. +// ofs and limit are used for multi-block byte array. +// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) +#ifdef _LP64 +void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, + XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, + Register buf, Register state, Register ofs, Register limit, Register rsp, + bool multi_block, XMMRegister shuf_mask) { +#else +void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, + XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, + Register buf, Register state, Register ofs, Register limit, Register rsp, + bool multi_block) { +#endif + Label start, done_hash, loop0; + + address K256 = StubRoutines::x86::k256_addr(); + address pshuffle_byte_flip_mask = StubRoutines::x86::pshuffle_byte_flip_mask_addr(); + + bind(start); + movdqu(state0, Address(state, 0)); + movdqu(state1, Address(state, 16)); + + pshufd(state0, state0, 0xB1); + pshufd(state1, state1, 0x1B); + movdqa(msgtmp4, state0); + palignr(state0, state1, 8); + pblendw(state1, msgtmp4, 0xF0); + +#ifdef _LP64 + movdqu(shuf_mask, ExternalAddress(pshuffle_byte_flip_mask)); +#endif + lea(rax, ExternalAddress(K256)); + + bind(loop0); + movdqu(Address(rsp, 0), state0); + movdqu(Address(rsp, 16), state1); + + // Rounds 0-3 + movdqu(msg, Address(buf, 0)); +#ifdef _LP64 + pshufb(msg, shuf_mask); +#else + pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); +#endif + movdqa(msgtmp0, msg); + paddd(msg, Address(rax, 0)); + sha256rnds2(state1, state0); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + + // Rounds 4-7 + movdqu(msg, Address(buf, 16)); +#ifdef _LP64 + pshufb(msg, shuf_mask); +#else + pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); +#endif + movdqa(msgtmp1, msg); + paddd(msg, Address(rax, 16)); + sha256rnds2(state1, state0); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + sha256msg1(msgtmp0, msgtmp1); + + // Rounds 8-11 + movdqu(msg, Address(buf, 32)); +#ifdef _LP64 + pshufb(msg, shuf_mask); +#else + pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); +#endif + movdqa(msgtmp2, msg); + paddd(msg, Address(rax, 32)); + sha256rnds2(state1, state0); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + sha256msg1(msgtmp1, msgtmp2); + + // Rounds 12-15 + movdqu(msg, Address(buf, 48)); +#ifdef _LP64 + pshufb(msg, shuf_mask); +#else + pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); +#endif + movdqa(msgtmp3, msg); + paddd(msg, Address(rax, 48)); + sha256rnds2(state1, state0); + movdqa(msgtmp4, msgtmp3); + palignr(msgtmp4, msgtmp2, 4); + paddd(msgtmp0, msgtmp4); + sha256msg2(msgtmp0, msgtmp3); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + sha256msg1(msgtmp2, msgtmp3); + + // Rounds 16-19 + movdqa(msg, msgtmp0); + paddd(msg, Address(rax, 64)); + sha256rnds2(state1, state0); + movdqa(msgtmp4, msgtmp0); + palignr(msgtmp4, msgtmp3, 4); + paddd(msgtmp1, msgtmp4); + sha256msg2(msgtmp1, msgtmp0); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + sha256msg1(msgtmp3, msgtmp0); + + // Rounds 20-23 + movdqa(msg, msgtmp1); + paddd(msg, Address(rax, 80)); + sha256rnds2(state1, state0); + movdqa(msgtmp4, msgtmp1); + palignr(msgtmp4, msgtmp0, 4); + paddd(msgtmp2, msgtmp4); + sha256msg2(msgtmp2, msgtmp1); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + sha256msg1(msgtmp0, msgtmp1); + + // Rounds 24-27 + movdqa(msg, msgtmp2); + paddd(msg, Address(rax, 96)); + sha256rnds2(state1, state0); + movdqa(msgtmp4, msgtmp2); + palignr(msgtmp4, msgtmp1, 4); + paddd(msgtmp3, msgtmp4); + sha256msg2(msgtmp3, msgtmp2); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + sha256msg1(msgtmp1, msgtmp2); + + // Rounds 28-31 + movdqa(msg, msgtmp3); + paddd(msg, Address(rax, 112)); + sha256rnds2(state1, state0); + movdqa(msgtmp4, msgtmp3); + palignr(msgtmp4, msgtmp2, 4); + paddd(msgtmp0, msgtmp4); + sha256msg2(msgtmp0, msgtmp3); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + sha256msg1(msgtmp2, msgtmp3); + + // Rounds 32-35 + movdqa(msg, msgtmp0); + paddd(msg, Address(rax, 128)); + sha256rnds2(state1, state0); + movdqa(msgtmp4, msgtmp0); + palignr(msgtmp4, msgtmp3, 4); + paddd(msgtmp1, msgtmp4); + sha256msg2(msgtmp1, msgtmp0); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + sha256msg1(msgtmp3, msgtmp0); + + // Rounds 36-39 + movdqa(msg, msgtmp1); + paddd(msg, Address(rax, 144)); + sha256rnds2(state1, state0); + movdqa(msgtmp4, msgtmp1); + palignr(msgtmp4, msgtmp0, 4); + paddd(msgtmp2, msgtmp4); + sha256msg2(msgtmp2, msgtmp1); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + sha256msg1(msgtmp0, msgtmp1); + + // Rounds 40-43 + movdqa(msg, msgtmp2); + paddd(msg, Address(rax, 160)); + sha256rnds2(state1, state0); + movdqa(msgtmp4, msgtmp2); + palignr(msgtmp4, msgtmp1, 4); + paddd(msgtmp3, msgtmp4); + sha256msg2(msgtmp3, msgtmp2); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + sha256msg1(msgtmp1, msgtmp2); + + // Rounds 44-47 + movdqa(msg, msgtmp3); + paddd(msg, Address(rax, 176)); + sha256rnds2(state1, state0); + movdqa(msgtmp4, msgtmp3); + palignr(msgtmp4, msgtmp2, 4); + paddd(msgtmp0, msgtmp4); + sha256msg2(msgtmp0, msgtmp3); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + sha256msg1(msgtmp2, msgtmp3); + + // Rounds 48-51 + movdqa(msg, msgtmp0); + paddd(msg, Address(rax, 192)); + sha256rnds2(state1, state0); + movdqa(msgtmp4, msgtmp0); + palignr(msgtmp4, msgtmp3, 4); + paddd(msgtmp1, msgtmp4); + sha256msg2(msgtmp1, msgtmp0); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + sha256msg1(msgtmp3, msgtmp0); + + // Rounds 52-55 + movdqa(msg, msgtmp1); + paddd(msg, Address(rax, 208)); + sha256rnds2(state1, state0); + movdqa(msgtmp4, msgtmp1); + palignr(msgtmp4, msgtmp0, 4); + paddd(msgtmp2, msgtmp4); + sha256msg2(msgtmp2, msgtmp1); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + + // Rounds 56-59 + movdqa(msg, msgtmp2); + paddd(msg, Address(rax, 224)); + sha256rnds2(state1, state0); + movdqa(msgtmp4, msgtmp2); + palignr(msgtmp4, msgtmp1, 4); + paddd(msgtmp3, msgtmp4); + sha256msg2(msgtmp3, msgtmp2); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + + // Rounds 60-63 + movdqa(msg, msgtmp3); + paddd(msg, Address(rax, 240)); + sha256rnds2(state1, state0); + pshufd(msg, msg, 0x0E); + sha256rnds2(state0, state1); + movdqu(msg, Address(rsp, 0)); + paddd(state0, msg); + movdqu(msg, Address(rsp, 16)); + paddd(state1, msg); + + if (multi_block) { + // increment data pointer and loop if more to process + addptr(buf, 64); + addptr(ofs, 64); + cmpptr(ofs, limit); + jcc(Assembler::belowEqual, loop0); + movptr(rax, ofs); //return ofs + } + + pshufd(state0, state0, 0x1B); + pshufd(state1, state1, 0xB1); + movdqa(msgtmp4, state0); + pblendw(state0, state1, 0xF0); + palignr(state1, msgtmp4, 8); + + movdqu(Address(state, 0), state0); + movdqu(Address(state, 16), state1); + + bind(done_hash); + +}
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -208,13 +208,13 @@ __ subptr(rsp, ymm_bytes); // Save upper half of YMM registers for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf128h(Address(rsp, n*16), as_XMMRegister(n)); + __ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n)); } if (UseAVX > 2) { __ subptr(rsp, zmm_bytes); // Save upper half of ZMM registers for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1); + __ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n)); } } } @@ -304,13 +304,13 @@ if (UseAVX > 2) { // Restore upper half of ZMM registers. for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1); + __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32)); } __ addptr(rsp, zmm_bytes); } // Restore upper half of YMM registers. for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16)); + __ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16)); } __ addptr(rsp, ymm_bytes); }
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -179,13 +179,13 @@ // Save upper half of YMM registers(0..15) int base_addr = XSAVE_AREA_YMM_BEGIN; for (int n = 0; n < 16; n++) { - __ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n)); + __ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n)); } if (VM_Version::supports_evex()) { // Save upper half of ZMM registers(0..15) base_addr = XSAVE_AREA_ZMM_BEGIN; for (int n = 0; n < 16; n++) { - __ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1); + __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n)); } // Save full ZMM registers(16..num_xmm_regs) base_addr = XSAVE_AREA_UPPERBANK; @@ -333,13 +333,13 @@ // Restore upper half of YMM registers (0..15) int base_addr = XSAVE_AREA_YMM_BEGIN; for (int n = 0; n < 16; n++) { - __ vinsertf128h(as_XMMRegister(n), Address(rsp, base_addr+n*16)); + __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16)); } if (VM_Version::supports_evex()) { // Restore upper half of ZMM registers (0..15) base_addr = XSAVE_AREA_ZMM_BEGIN; for (int n = 0; n < 16; n++) { - __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1); + __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32)); } // Restore full ZMM registers(16..num_xmm_regs) base_addr = XSAVE_AREA_UPPERBANK;
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -3068,6 +3068,136 @@ return start; } + address generate_upper_word_mask() { + __ align(64); + StubCodeMark mark(this, "StubRoutines", "upper_word_mask"); + address start = __ pc(); + __ emit_data(0x00000000, relocInfo::none, 0); + __ emit_data(0x00000000, relocInfo::none, 0); + __ emit_data(0x00000000, relocInfo::none, 0); + __ emit_data(0xFFFFFFFF, relocInfo::none, 0); + return start; + } + + address generate_shuffle_byte_flip_mask() { + __ align(64); + StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask"); + address start = __ pc(); + __ emit_data(0x0c0d0e0f, relocInfo::none, 0); + __ emit_data(0x08090a0b, relocInfo::none, 0); + __ emit_data(0x04050607, relocInfo::none, 0); + __ emit_data(0x00010203, relocInfo::none, 0); + return start; + } + + // ofs and limit are use for multi-block byte array. + // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) + address generate_sha1_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Register buf = rax; + Register state = rdx; + Register ofs = rcx; + Register limit = rdi; + + const Address buf_param(rbp, 8 + 0); + const Address state_param(rbp, 8 + 4); + const Address ofs_param(rbp, 8 + 8); + const Address limit_param(rbp, 8 + 12); + + const XMMRegister abcd = xmm0; + const XMMRegister e0 = xmm1; + const XMMRegister e1 = xmm2; + const XMMRegister msg0 = xmm3; + + const XMMRegister msg1 = xmm4; + const XMMRegister msg2 = xmm5; + const XMMRegister msg3 = xmm6; + const XMMRegister shuf_mask = xmm7; + + __ enter(); + __ subptr(rsp, 8 * wordSize); + if (multi_block) { + __ push(limit); + } + __ movptr(buf, buf_param); + __ movptr(state, state_param); + if (multi_block) { + __ movptr(ofs, ofs_param); + __ movptr(limit, limit_param); + } + + __ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask, + buf, state, ofs, limit, rsp, multi_block); + + if (multi_block) { + __ pop(limit); + } + __ addptr(rsp, 8 * wordSize); + __ leave(); + __ ret(0); + return start; + } + + address generate_pshuffle_byte_flip_mask() { + __ align(64); + StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask"); + address start = __ pc(); + __ emit_data(0x00010203, relocInfo::none, 0); + __ emit_data(0x04050607, relocInfo::none, 0); + __ emit_data(0x08090a0b, relocInfo::none, 0); + __ emit_data(0x0c0d0e0f, relocInfo::none, 0); + return start; + } + + // ofs and limit are use for multi-block byte array. + // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) + address generate_sha256_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Register buf = rbx; + Register state = rsi; + Register ofs = rdx; + Register limit = rcx; + + const Address buf_param(rbp, 8 + 0); + const Address state_param(rbp, 8 + 4); + const Address ofs_param(rbp, 8 + 8); + const Address limit_param(rbp, 8 + 12); + + const XMMRegister msg = xmm0; + const XMMRegister state0 = xmm1; + const XMMRegister state1 = xmm2; + const XMMRegister msgtmp0 = xmm3; + + const XMMRegister msgtmp1 = xmm4; + const XMMRegister msgtmp2 = xmm5; + const XMMRegister msgtmp3 = xmm6; + const XMMRegister msgtmp4 = xmm7; + + __ enter(); + __ subptr(rsp, 8 * wordSize); + handleSOERegisters(true /*saving*/); + __ movptr(buf, buf_param); + __ movptr(state, state_param); + if (multi_block) { + __ movptr(ofs, ofs_param); + __ movptr(limit, limit_param); + } + + __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, + buf, state, ofs, limit, rsp, multi_block); + + handleSOERegisters(false); + __ addptr(rsp, 8 * wordSize); + __ leave(); + __ ret(0); + return start; + } // byte swap x86 long address generate_ghash_long_swap_mask() { @@ -3772,6 +3902,19 @@ StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel(); } + if (UseSHA1Intrinsics) { + StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask(); + StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask(); + StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress"); + StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB"); + } + if (UseSHA256Intrinsics) { + StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256; + StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask(); + StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); + StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); + } + // Generate GHASH intrinsics code if (UseGHASHIntrinsics) { StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -275,7 +275,7 @@ } if (VM_Version::supports_evex()) { for (int i = xmm_save_first; i <= last_reg; i++) { - __ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0); + __ vextractf32x4(xmm_save(i), as_XMMRegister(i), 0); } } else { for (int i = xmm_save_first; i <= last_reg; i++) { @@ -393,7 +393,7 @@ // emit the restores for xmm regs if (VM_Version::supports_evex()) { for (int i = xmm_save_first; i <= last_reg; i++) { - __ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0); + __ vinsertf32x4(as_XMMRegister(i), as_XMMRegister(i), xmm_save(i), 0); } } else { for (int i = xmm_save_first; i <= last_reg; i++) { @@ -3695,6 +3695,133 @@ return start; } + address generate_upper_word_mask() { + __ align(64); + StubCodeMark mark(this, "StubRoutines", "upper_word_mask"); + address start = __ pc(); + __ emit_data64(0x0000000000000000, relocInfo::none); + __ emit_data64(0xFFFFFFFF00000000, relocInfo::none); + return start; + } + + address generate_shuffle_byte_flip_mask() { + __ align(64); + StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask"); + address start = __ pc(); + __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none); + __ emit_data64(0x0001020304050607, relocInfo::none); + return start; + } + + // ofs and limit are use for multi-block byte array. + // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) + address generate_sha1_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Register buf = c_rarg0; + Register state = c_rarg1; + Register ofs = c_rarg2; + Register limit = c_rarg3; + + const XMMRegister abcd = xmm0; + const XMMRegister e0 = xmm1; + const XMMRegister e1 = xmm2; + const XMMRegister msg0 = xmm3; + + const XMMRegister msg1 = xmm4; + const XMMRegister msg2 = xmm5; + const XMMRegister msg3 = xmm6; + const XMMRegister shuf_mask = xmm7; + + __ enter(); + +#ifdef _WIN64 + // save the xmm registers which must be preserved 6-7 + __ subptr(rsp, 4 * wordSize); + __ movdqu(Address(rsp, 0), xmm6); + __ movdqu(Address(rsp, 2 * wordSize), xmm7); +#endif + + __ subptr(rsp, 4 * wordSize); + + __ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask, + buf, state, ofs, limit, rsp, multi_block); + + __ addptr(rsp, 4 * wordSize); +#ifdef _WIN64 + // restore xmm regs belonging to calling function + __ movdqu(xmm6, Address(rsp, 0)); + __ movdqu(xmm7, Address(rsp, 2 * wordSize)); + __ addptr(rsp, 4 * wordSize); +#endif + + __ leave(); + __ ret(0); + return start; + } + + address generate_pshuffle_byte_flip_mask() { + __ align(64); + StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask"); + address start = __ pc(); + __ emit_data64(0x0405060700010203, relocInfo::none); + __ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none); + return start; + } + +// ofs and limit are use for multi-block byte array. +// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) + address generate_sha256_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Register buf = c_rarg0; + Register state = c_rarg1; + Register ofs = c_rarg2; + Register limit = c_rarg3; + + const XMMRegister msg = xmm0; + const XMMRegister state0 = xmm1; + const XMMRegister state1 = xmm2; + const XMMRegister msgtmp0 = xmm3; + + const XMMRegister msgtmp1 = xmm4; + const XMMRegister msgtmp2 = xmm5; + const XMMRegister msgtmp3 = xmm6; + const XMMRegister msgtmp4 = xmm7; + + const XMMRegister shuf_mask = xmm8; + + __ enter(); +#ifdef _WIN64 + // save the xmm registers which must be preserved 6-7 + __ subptr(rsp, 6 * wordSize); + __ movdqu(Address(rsp, 0), xmm6); + __ movdqu(Address(rsp, 2 * wordSize), xmm7); + __ movdqu(Address(rsp, 4 * wordSize), xmm8); +#endif + + __ subptr(rsp, 4 * wordSize); + + __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, + buf, state, ofs, limit, rsp, multi_block, shuf_mask); + + __ addptr(rsp, 4 * wordSize); +#ifdef _WIN64 + // restore xmm regs belonging to calling function + __ movdqu(xmm6, Address(rsp, 0)); + __ movdqu(xmm7, Address(rsp, 2 * wordSize)); + __ movdqu(xmm8, Address(rsp, 4 * wordSize)); + __ addptr(rsp, 6 * wordSize); +#endif + __ leave(); + __ ret(0); + return start; + } + // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time // to hide instruction latency // @@ -4974,6 +5101,19 @@ StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel(); } + if (UseSHA1Intrinsics) { + StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask(); + StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask(); + StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress"); + StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB"); + } + if (UseSHA256Intrinsics) { + StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256; + StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask(); + StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); + StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); + } + // Generate GHASH intrinsics code if (UseGHASHIntrinsics) { StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
--- a/src/cpu/x86/vm/stubRoutines_x86.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/stubRoutines_x86.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -29,6 +29,12 @@ #include "runtime/thread.inline.hpp" #include "crc32c.h" +#ifdef _MSC_VER +#define ALIGNED_(x) __declspec(align(x)) +#else +#define ALIGNED_(x) __attribute__ ((aligned(x))) +#endif + // Implementation of the platform-specific part of StubRoutines - for // a description of how to extend it, see the stubRoutines.hpp file. @@ -37,6 +43,10 @@ address StubRoutines::x86::_counter_shuffle_mask_addr = NULL; address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL; address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL; +address StubRoutines::x86::_upper_word_mask_addr = NULL; +address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL; +address StubRoutines::x86::_k256_adr = NULL; +address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL; uint64_t StubRoutines::x86::_crc_by128_masks[] = { @@ -236,3 +246,23 @@ _crc32c_table = (juint*)pclmulqdq_table; } } + +ALIGNED_(64) juint StubRoutines::x86::_k256[] = +{ + 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, + 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, + 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, + 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL, + 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, + 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, + 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, + 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL, + 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, + 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, + 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, + 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, + 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, + 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL, + 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, + 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL +};
--- a/src/cpu/x86/vm/stubRoutines_x86.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/stubRoutines_x86.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -46,6 +46,17 @@ static address _ghash_long_swap_mask_addr; static address _ghash_byte_swap_mask_addr; + // upper word mask for sha1 + static address _upper_word_mask_addr; + // byte flip mask for sha1 + static address _shuffle_byte_flip_mask_addr; + + //k256 table for sha256 + static juint _k256[]; + static address _k256_adr; + // byte flip mask for sha256 + static address _pshuffle_byte_flip_mask_addr; + public: static address verify_mxcsr_entry() { return _verify_mxcsr_entry; } static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; } @@ -53,5 +64,9 @@ static address crc_by128_masks_addr() { return (address)_crc_by128_masks; } static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; } static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; } + static address upper_word_mask_addr() { return _upper_word_mask_addr; } + static address shuffle_byte_flip_mask_addr() { return _shuffle_byte_flip_mask_addr; } + static address k256_addr() { return _k256_adr; } + static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; } static void generate_CRC32C_table(bool is_pclmulqdq_supported); #endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
--- a/src/cpu/x86/vm/vmStructs_x86.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/vmStructs_x86.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -68,10 +68,11 @@ declare_constant(VM_Version::CPU_AVX512DQ) \ declare_constant(VM_Version::CPU_AVX512PF) \ declare_constant(VM_Version::CPU_AVX512ER) \ - declare_constant(VM_Version::CPU_AVX512CD) \ - declare_constant(VM_Version::CPU_AVX512BW) + declare_constant(VM_Version::CPU_AVX512CD) #define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ - declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL) + declare_preprocessor_constant("VM_Version::CPU_AVX512BW", CPU_AVX512BW) \ + declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL) \ + declare_preprocessor_constant("VM_Version::CPU_SHA", CPU_SHA) #endif // CPU_X86_VM_VMSTRUCTS_X86_HPP
--- a/src/cpu/x86/vm/vm_version_x86.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -385,7 +385,7 @@ __ movdl(xmm0, rcx); __ pshufd(xmm0, xmm0, 0x00); - __ vinsertf128h(xmm0, xmm0, xmm0); + __ vinsertf128_high(xmm0, xmm0); __ vmovdqu(xmm7, xmm0); #ifdef _LP64 __ vmovdqu(xmm8, xmm0); @@ -577,7 +577,7 @@ } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -608,7 +608,8 @@ (supports_bmi1() ? ", bmi1" : ""), (supports_bmi2() ? ", bmi2" : ""), (supports_adx() ? ", adx" : ""), - (supports_evex() ? ", evex" : "")); + (supports_evex() ? ", evex" : ""), + (supports_sha() ? ", sha" : "")); _features_string = os::strdup(buf); // UseSSE is set to the smaller of what hardware supports and what @@ -730,17 +731,29 @@ FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); } - if (UseSHA) { + if (supports_sha()) { + if (FLAG_IS_DEFAULT(UseSHA)) { + UseSHA = true; + } + } else if (UseSHA) { warning("SHA instructions are not available on this CPU"); FLAG_SET_DEFAULT(UseSHA, false); } - if (UseSHA1Intrinsics) { + if (UseSHA) { + if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); + } + } else if (UseSHA1Intrinsics) { warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); } - if (UseSHA256Intrinsics) { + if (UseSHA) { + if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); + } + } else if (UseSHA256Intrinsics) { warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); } @@ -750,6 +763,10 @@ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } + if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA, false); + } + if (UseAdler32Intrinsics) { warning("Adler32Intrinsics not available on this CPU."); FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
--- a/src/cpu/x86/vm/vm_version_x86.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -221,7 +221,7 @@ avx512pf : 1, avx512er : 1, avx512cd : 1, - : 1, + sha : 1, avx512bw : 1, avx512vl : 1; } bits; @@ -282,11 +282,13 @@ CPU_AVX512DQ = (1 << 27), CPU_AVX512PF = (1 << 28), CPU_AVX512ER = (1 << 29), - CPU_AVX512CD = (1 << 30), - CPU_AVX512BW = (1 << 31) + CPU_AVX512CD = (1 << 30) + // Keeping sign bit 31 unassigned. }; -#define CPU_AVX512VL UCONST64(0x100000000) // EVEX instructions with smaller vector length : enums are limited to 32bit +#define CPU_AVX512BW ((uint64_t)UCONST64(0x100000000)) // enums are limited to 31 bit +#define CPU_AVX512VL ((uint64_t)UCONST64(0x200000000)) // EVEX instructions with smaller vector length +#define CPU_SHA ((uint64_t)UCONST64(0x400000000)) // SHA instructions enum Extended_Family { // AMD @@ -516,6 +518,8 @@ result |= CPU_ADX; if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0) result |= CPU_BMI2; + if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0) + result |= CPU_SHA; if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0) result |= CPU_LZCNT; // for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw @@ -721,6 +725,7 @@ static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); } static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); } static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); } + static bool supports_sha() { return (_features & CPU_SHA) != 0; } // Intel features static bool is_intel_family_core() { return is_intel() && extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
--- a/src/cpu/x86/vm/x86.ad Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/x86.ad Thu Mar 17 17:03:20 2016 +0000 @@ -3179,13 +3179,13 @@ "punpcklbw $dst,$dst\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate32B" %} + "vinserti128_high $dst,$dst\t! replicate32B" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3196,12 +3196,12 @@ format %{ "punpcklbw $dst,$mem\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate32B" %} + "vinserti128_high $dst,$dst\t! replicate32B" %} ins_encode %{ __ punpcklbw($dst$$XMMRegister, $mem$$Address); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3223,11 +3223,11 @@ match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} + "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3298,12 +3298,12 @@ format %{ "movd $dst,$src\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate16S" %} + "vinserti128_high $dst,$dst\t! replicate16S" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3313,11 +3313,11 @@ match(Set dst (ReplicateS (LoadS mem))); format %{ "pshuflw $dst,$mem,0x00\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate16S" %} + "vinserti128_high $dst,$dst\t! replicate16S" %} ins_encode %{ __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3327,11 +3327,11 @@ match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} + "vinserti128_high $dst,$dst\t! replicate16S($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3363,11 +3363,11 @@ match(Set dst (ReplicateI src)); format %{ "movd $dst,$src\n\t" "pshufd $dst,$dst,0x00\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate8I" %} + "vinserti128_high $dst,$dst\t! replicate8I" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3376,10 +3376,10 @@ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI (LoadI mem))); format %{ "pshufd $dst,$mem,0x00\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate8I" %} + "vinserti128_high $dst,$dst\t! replicate8I" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3401,11 +3401,11 @@ match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst" %} + "vinserti128_high $dst,$dst" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3430,11 +3430,11 @@ match(Set dst (ReplicateL src)); format %{ "movdq $dst,$src\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L" %} + "vinserti128_high $dst,$dst\t! replicate4L" %} ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3447,13 +3447,13 @@ "movdl $tmp,$src.hi\n\t" "punpckldq $dst,$tmp\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L" %} + "vinserti128_high $dst,$dst\t! replicate4L" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3464,11 +3464,11 @@ match(Set dst (ReplicateL con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} + "vinserti128_high $dst,$dst\t! replicate4L($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress($con)); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3478,11 +3478,11 @@ match(Set dst (ReplicateL (LoadL mem))); format %{ "movq $dst,$mem\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L" %} + "vinserti128_high $dst,$dst\t! replicate4L" %} ins_encode %{ __ movq($dst$$XMMRegister, $mem$$Address); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3511,10 +3511,10 @@ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateF src)); format %{ "pshufd $dst,$src,0x00\n\t" - "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} + "vinsertf128_high $dst,$dst\t! replicate8F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3523,10 +3523,10 @@ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateF (LoadF mem))); format %{ "pshufd $dst,$mem,0x00\n\t" - "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} + "vinsertf128_high $dst,$dst\t! replicate8F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3576,10 +3576,10 @@ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateD src)); format %{ "pshufd $dst,$src,0x44\n\t" - "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} + "vinsertf128_high $dst,$dst\t! replicate4D" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3588,10 +3588,10 @@ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateD (LoadD mem))); format %{ "pshufd $dst,$mem,0x44\n\t" - "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} + "vinsertf128_high $dst,$dst\t! replicate4D" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -4791,7 +4791,7 @@ effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "vphaddd $tmp,$tmp,$tmp2\n\t" - "vextracti128 $tmp2,$tmp\n\t" + "vextracti128_high $tmp2,$tmp\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" @@ -4800,7 +4800,7 @@ int vector_len = 1; __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); - __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -4813,7 +4813,7 @@ predicate(UseAVX > 2); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti128 $tmp,$src2\n\t" + format %{ "vextracti128_high $tmp,$src2\n\t" "vpaddd $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" @@ -4824,7 +4824,7 @@ "movd $dst,$tmp2\t! add reduction8I" %} ins_encode %{ int vector_len = 0; - __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); + __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); @@ -4841,9 +4841,9 @@ predicate(UseAVX > 2); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" + format %{ "vextracti64x4_high $tmp3,$src2\n\t" "vpaddd $tmp3,$tmp3,$src2\n\t" - "vextracti128 $tmp,$tmp3\n\t" + "vextracti128_high $tmp,$tmp3\n\t" "vpaddd $tmp,$tmp,$tmp3\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" @@ -4853,9 +4853,9 @@ "vpaddd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ - __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); + __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); - __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); + __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); @@ -4892,7 +4892,7 @@ predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti128 $tmp,$src2\n\t" + format %{ "vextracti128_high $tmp,$src2\n\t" "vpaddq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" @@ -4900,7 +4900,7 @@ "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction4L" %} ins_encode %{ - __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); + __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -4915,9 +4915,9 @@ predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" + format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpaddq $tmp2,$tmp2,$src2\n\t" - "vextracti128 $tmp,$tmp2\n\t" + "vextracti128_high $tmp,$tmp2\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" @@ -4925,9 +4925,9 @@ "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction8L" %} ins_encode %{ - __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); + __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); - __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -5026,7 +5026,7 @@ "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf128 $tmp2,$src2\n\t" + "vextractf128_high $tmp2,$src2\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" @@ -5042,7 +5042,7 @@ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5065,7 +5065,7 @@ "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" @@ -5073,7 +5073,7 @@ "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vextractf32x4 $tmp2,$src2,0x2\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" @@ -5081,7 +5081,7 @@ "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vextractf32x4 $tmp2,$src2,0x3\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" @@ -5097,7 +5097,7 @@ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5105,7 +5105,7 @@ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5113,7 +5113,7 @@ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5162,7 +5162,7 @@ format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4h $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} @@ -5170,7 +5170,7 @@ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5185,15 +5185,15 @@ format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vextractf32x4 $tmp2,$src2,0x2\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vextractf32x4 $tmp2,$src2,0x3\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} @@ -5201,15 +5201,15 @@ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5307,7 +5307,7 @@ predicate(UseAVX > 0); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti128 $tmp,$src2\n\t" + format %{ "vextracti128_high $tmp,$src2\n\t" "vpmulld $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" @@ -5318,7 +5318,7 @@ "movd $dst,$tmp2\t! mul reduction8I" %} ins_encode %{ int vector_len = 0; - __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); + __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); @@ -5335,9 +5335,9 @@ predicate(UseAVX > 2); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" + format %{ "vextracti64x4_high $tmp3,$src2\n\t" "vpmulld $tmp3,$tmp3,$src2\n\t" - "vextracti128 $tmp,$tmp3\n\t" + "vextracti128_high $tmp,$tmp3\n\t" "vpmulld $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" @@ -5347,9 +5347,9 @@ "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ - __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); + __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); - __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); + __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); @@ -5386,7 +5386,7 @@ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti128 $tmp,$src2\n\t" + format %{ "vextracti128_high $tmp,$src2\n\t" "vpmullq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" @@ -5394,7 +5394,7 @@ "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction4L" %} ins_encode %{ - __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); + __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -5409,9 +5409,9 @@ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" + format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpmullq $tmp2,$tmp2,$src2\n\t" - "vextracti128 $tmp,$tmp2\n\t" + "vextracti128_high $tmp,$tmp2\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" @@ -5419,9 +5419,9 @@ "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction8L" %} ins_encode %{ - __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); + __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); - __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -5520,7 +5520,7 @@ "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf128 $tmp2,$src2\n\t" + "vextractf128_high $tmp2,$src2\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" @@ -5536,7 +5536,7 @@ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5559,7 +5559,7 @@ "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" @@ -5567,7 +5567,7 @@ "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vextractf32x4 $tmp2,$src2,0x2\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" @@ -5575,7 +5575,7 @@ "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vextractf32x4 $tmp2,$src2,0x3\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" @@ -5591,7 +5591,7 @@ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5599,7 +5599,7 @@ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5607,7 +5607,7 @@ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5656,7 +5656,7 @@ format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf128 $tmp2,$src2\n\t" + "vextractf128_high $tmp2,$src2\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} @@ -5664,7 +5664,7 @@ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5679,15 +5679,15 @@ format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vextractf32x4 $tmp2,$src2,0x2\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vextractf32x4 $tmp2,$src2,0x3\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} @@ -5695,15 +5695,15 @@ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
--- a/src/cpu/x86/vm/x86_32.ad Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/x86_32.ad Thu Mar 17 17:03:20 2016 +0000 @@ -1420,9 +1420,6 @@ // The ecx parameter to rep stos for the ClearArray node is in dwords. const bool Matcher::init_array_count_is_in_bytes = false; -// Threshold size for cleararray. -const int Matcher::init_array_short_size = 8 * BytesPerLong; - // Needs 2 CMOV's for longs. const int Matcher::long_cmove_cost() { return 1; } @@ -11369,27 +11366,54 @@ // ======================================================================= // fast clearing of an array instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ - predicate(!UseFastStosb); + predicate(!((ClearArrayNode*)n)->is_large()); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); - format %{ "XOR EAX,EAX\t# ClearArray:\n\t" - "SHL ECX,1\t# Convert doublewords to words\n\t" - "REP STOS\t# store EAX into [EDI++] while ECX--" %} - ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); - %} - ins_pipe( pipe_slow ); -%} - -instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ - predicate(UseFastStosb); + + format %{ $$template + $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" + $$emit$$"CMP InitArrayShortSize,rcx\n\t" + $$emit$$"JG LARGE\n\t" + $$emit$$"SHL ECX, 1\n\t" + $$emit$$"DEC ECX\n\t" + $$emit$$"JS DONE\t# Zero length\n\t" + $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" + $$emit$$"DEC ECX\n\t" + $$emit$$"JGE LOOP\n\t" + $$emit$$"JMP DONE\n\t" + $$emit$$"# LARGE:\n\t" + if (UseFastStosb) { + $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" + $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" + } else { + $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" + $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" + } + $$emit$$"# DONE" + %} + ins_encode %{ + __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false); + %} + ins_pipe( pipe_slow ); +%} + +instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ + predicate(((ClearArrayNode*)n)->is_large()); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); - format %{ "XOR EAX,EAX\t# ClearArray:\n\t" - "SHL ECX,3\t# Convert doublewords to bytes\n\t" - "REP STOSB\t# store EAX into [EDI++] while ECX--" %} - ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); + format %{ $$template + $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" + if (UseFastStosb) { + $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" + $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" + } else { + $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" + $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" + } + $$emit$$"# DONE" + %} + ins_encode %{ + __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true); %} ins_pipe( pipe_slow ); %}
--- a/src/cpu/x86/vm/x86_64.ad Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/x86/vm/x86_64.ad Thu Mar 17 17:03:20 2016 +0000 @@ -1637,9 +1637,6 @@ // The ecx parameter to rep stosq for the ClearArray node is in words. const bool Matcher::init_array_count_is_in_bytes = false; -// Threshold size for cleararray. -const int Matcher::init_array_short_size = 8 * BytesPerLong; - // No additional cost for CMOVL. const int Matcher::long_cmove_cost() { return 0; } @@ -10460,31 +10457,55 @@ instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy, rFlagsReg cr) %{ - predicate(!UseFastStosb); + predicate(!((ClearArrayNode*)n)->is_large()); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); - format %{ "xorq rax, rax\t# ClearArray:\n\t" - "rep stosq\t# Store rax to *rdi++ while rcx--" %} - ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); + format %{ $$template + $$emit$$"xorq rax, rax\t# ClearArray:\n\t" + $$emit$$"cmp InitArrayShortSize,rcx\n\t" + $$emit$$"jg LARGE\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"js DONE\t# Zero length\n\t" + $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"jge LOOP\n\t" + $$emit$$"jmp DONE\n\t" + $$emit$$"# LARGE:\n\t" + if (UseFastStosb) { + $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t" + $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t" + } else { + $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t" + } + $$emit$$"# DONE" + %} + ins_encode %{ + __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false); %} ins_pipe(pipe_slow); %} -instruct rep_fast_stosb(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy, - rFlagsReg cr) -%{ - predicate(UseFastStosb); +instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy, + rFlagsReg cr) +%{ + predicate(((ClearArrayNode*)n)->is_large()); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); - format %{ "xorq rax, rax\t# ClearArray:\n\t" - "shlq rcx,3\t# Convert doublewords to bytes\n\t" - "rep stosb\t# Store rax to *rdi++ while rcx--" %} - ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); - %} - ins_pipe( pipe_slow ); + + format %{ $$template + $$emit$$"xorq rax, rax\t# ClearArray:\n\t" + if (UseFastStosb) { + $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t" + $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--" + } else { + $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--" + } + %} + ins_encode %{ + __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true); + %} + ins_pipe(pipe_slow); %} instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
--- a/src/cpu/zero/vm/cppInterpreter_zero.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/zero/vm/cppInterpreter_zero.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -773,7 +773,7 @@ } BasicType CppInterpreter::result_type_of(Method* method) { - BasicType t; + BasicType t = T_ILLEGAL; // silence compiler warnings switch (method->result_index()) { case 0 : t = T_BOOLEAN; break; case 1 : t = T_CHAR; break;
--- a/src/cpu/zero/vm/interpreterRT_zero.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/cpu/zero/vm/interpreterRT_zero.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright 2007, 2008, 2010 Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -62,7 +62,7 @@ } void InterpreterRuntime::SignatureHandlerGeneratorBase::push(BasicType type) { - ffi_type *ftype; + ffi_type *ftype = NULL; switch (type) { case T_VOID: ftype = &ffi_type_void;
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/development/Server16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/development/Server24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/About16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/About24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Delete16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Delete24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Find16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Help16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Help24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/History16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/History24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Information16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Information24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/New16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/New24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Open16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Open24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Save24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/SaveAs16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/SaveAs24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/Zoom16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/ZoomIn16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/general/ZoomIn24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/navigation/Down16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/navigation/Up16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/text/AlignCenter16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/text/AlignCenter24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/text/AlignLeft16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/text/AlignLeft24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/text/AlignRight16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/images/toolbarButtonGraphics/text/AlignRight24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/development/Server16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/development/Server24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/About16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/About24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/Delete16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/Delete24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/Find16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/Help16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/Help24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/History16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/History24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/Information16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/Information24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/Open16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/Open24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/Save24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/SaveAs16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/SaveAs24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/Zoom16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/ZoomIn16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/general/ZoomIn24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/navigation/Down16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/navigation/Up16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/text/AlignCenter16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/text/AlignCenter24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/text/AlignLeft16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/text/AlignLeft24.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/text/AlignRight16.gif has changed
Binary file src/jdk.hotspot.agent/share/classes/toolbarButtonGraphics/text/AlignRight24.gif has changed
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.amd64/src/jdk/vm/ci/amd64/AMD64.java Thu Feb 25 14:59:44 2016 +0000 +++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.amd64/src/jdk/vm/ci/amd64/AMD64.java Thu Mar 17 17:03:20 2016 +0000 @@ -22,6 +22,7 @@ */ package jdk.vm.ci.amd64; +import static jdk.vm.ci.code.MemoryBarriers.LOAD_LOAD; import static jdk.vm.ci.code.MemoryBarriers.LOAD_STORE; import static jdk.vm.ci.code.MemoryBarriers.STORE_STORE; import static jdk.vm.ci.code.Register.SPECIAL; @@ -202,7 +203,8 @@ AVX512ER, AVX512CD, AVX512BW, - AVX512VL + AVX512VL, + SHA } private final EnumSet<CPUFeature> features; @@ -220,7 +222,7 @@ private final AMD64Kind largestKind; public AMD64(EnumSet<CPUFeature> features, EnumSet<Flag> flags) { - super("AMD64", AMD64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, LOAD_STORE | STORE_STORE, 1, 8); + super("AMD64", AMD64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, LOAD_LOAD | LOAD_STORE | STORE_STORE, 1, 8); this.features = features; this.flags = flags; assert features.contains(CPUFeature.SSE2) : "minimum config for x64";
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.amd64/src/jdk/vm/ci/hotspot/amd64/AMD64HotSpotJVMCIBackendFactory.java Thu Feb 25 14:59:44 2016 +0000 +++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.amd64/src/jdk/vm/ci/hotspot/amd64/AMD64HotSpotJVMCIBackendFactory.java Thu Mar 17 17:03:20 2016 +0000 @@ -122,6 +122,9 @@ if ((config.vmVersionFeatures & config.amd64AVX512VL) != 0) { features.add(AMD64.CPUFeature.AVX512VL); } + if ((config.vmVersionFeatures & config.amd64SHA) != 0) { + features.add(AMD64.CPUFeature.SHA); + } return features; }
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotConstantReflectionProvider.java Thu Feb 25 14:59:44 2016 +0000 +++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotConstantReflectionProvider.java Thu Mar 17 17:03:20 2016 +0000 @@ -339,7 +339,7 @@ public JavaConstant readStableFieldValue(ResolvedJavaField field, JavaConstant receiver, boolean isDefaultStable) { JavaConstant fieldValue = readNonStableFieldValue(field, receiver); - if (fieldValue.isNonNull()) { + if (fieldValue != null && fieldValue.isNonNull()) { JavaType declaredType = field.getType(); if (declaredType.getComponentType() != null) { int stableDimension = getArrayDimension(declaredType);
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotJVMCICompilerConfig.java Thu Feb 25 14:59:44 2016 +0000 +++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotJVMCICompilerConfig.java Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ import jdk.vm.ci.code.CompilationRequest; import jdk.vm.ci.code.CompilationRequestResult; import jdk.vm.ci.common.JVMCIError; +import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime.Option; import jdk.vm.ci.runtime.JVMCICompiler; import jdk.vm.ci.runtime.JVMCICompilerFactory; import jdk.vm.ci.runtime.JVMCIRuntime; @@ -47,29 +48,33 @@ } } + /** + * Factory of the selected system compiler. + */ private static JVMCICompilerFactory compilerFactory; /** - * Selects the system compiler. + * Gets the selected system compiler factory. * - * Called from VM. This method has an object return type to allow it to be called with a VM - * utility function used to call other static initialization methods. + * @return the selected system compiler factory */ - static Boolean selectCompiler(String compilerName) { - assert compilerFactory == null; - for (JVMCICompilerFactory factory : Services.load(JVMCICompilerFactory.class)) { - if (factory.getCompilerName().equals(compilerName)) { - compilerFactory = factory; - return Boolean.TRUE; - } - } - - throw new JVMCIError("JVMCI compiler '%s' not found", compilerName); - } - static JVMCICompilerFactory getCompilerFactory() { if (compilerFactory == null) { - compilerFactory = new DummyCompilerFactory(); + JVMCICompilerFactory factory = null; + String compilerName = Option.Compiler.getString(); + if (compilerName != null) { + for (JVMCICompilerFactory f : Services.load(JVMCICompilerFactory.class)) { + if (f.getCompilerName().equals(compilerName)) { + factory = f; + } + } + if (factory == null) { + throw new JVMCIError("JVMCI compiler '%s' not found", compilerName); + } + } else { + factory = new DummyCompilerFactory(); + } + compilerFactory = factory; } return compilerFactory; }
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotJVMCIRuntime.java Thu Feb 25 14:59:44 2016 +0000 +++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotJVMCIRuntime.java Thu Mar 17 17:03:20 2016 +0000 @@ -91,6 +91,7 @@ * A list of all supported JVMCI options. */ public enum Option { + Compiler(String.class, null, "Selects the system compiler."), ImplicitStableValues(boolean.class, true, "Mark well-known stable fields as such."), // Note: The following one is not used (see InitTimer.ENABLED). InitTimer(boolean.class, false, "Specifies if initialization timing is enabled."),
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotMetaAccessProvider.java Thu Feb 25 14:59:44 2016 +0000 +++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotMetaAccessProvider.java Thu Mar 17 17:03:20 2016 +0000 @@ -41,7 +41,6 @@ import jdk.vm.ci.meta.DeoptimizationReason; import jdk.vm.ci.meta.JavaConstant; import jdk.vm.ci.meta.JavaKind; -import jdk.vm.ci.meta.JavaType; import jdk.vm.ci.meta.MetaAccessProvider; import jdk.vm.ci.meta.ResolvedJavaField; import jdk.vm.ci.meta.ResolvedJavaMethod; @@ -111,23 +110,26 @@ } public ResolvedJavaField lookupJavaField(Field reflectionField) { - String name = reflectionField.getName(); Class<?> fieldHolder = reflectionField.getDeclaringClass(); - Class<?> fieldType = reflectionField.getType(); - // java.lang.reflect.Field's modifiers should be enough here since VM internal modifier bits - // are not used (yet). - final int modifiers = reflectionField.getModifiers(); - final long offset = Modifier.isStatic(modifiers) ? UNSAFE.staticFieldOffset(reflectionField) : UNSAFE.objectFieldOffset(reflectionField); HotSpotResolvedObjectType holder = fromObjectClass(fieldHolder); - JavaType type = runtime.fromClass(fieldType); + if (Modifier.isStatic(reflectionField.getModifiers())) { + final long offset = UNSAFE.staticFieldOffset(reflectionField); + for (ResolvedJavaField field : holder.getStaticFields()) { + if (offset == ((HotSpotResolvedJavaField) field).offset()) { + return field; + } + } + } else { + final long offset = UNSAFE.objectFieldOffset(reflectionField); + for (ResolvedJavaField field : holder.getInstanceFields(false)) { + if (offset == ((HotSpotResolvedJavaField) field).offset()) { + return field; + } + } + } - if (offset != -1) { - HotSpotResolvedObjectType resolved = holder; - return resolved.createField(name, type, offset, modifiers); - } else { - throw new JVMCIError("unresolved field %s", reflectionField); - } + throw new JVMCIError("unresolved field %s", reflectionField); } private static int intMaskRight(int n) {
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java Thu Feb 25 14:59:44 2016 +0000 +++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java Thu Mar 17 17:03:20 2016 +0000 @@ -945,6 +945,7 @@ @HotSpotVMConstant(name = "VM_Version::CPU_AVX512CD", archs = {"amd64"}) @Stable public long amd64AVX512CD; @HotSpotVMConstant(name = "VM_Version::CPU_AVX512BW", archs = {"amd64"}) @Stable public long amd64AVX512BW; @HotSpotVMConstant(name = "VM_Version::CPU_AVX512VL", archs = {"amd64"}) @Stable public long amd64AVX512VL; + @HotSpotVMConstant(name = "VM_Version::CPU_SHA", archs = {"amd64"}) @Stable public long amd64SHA; // SPARC specific values @HotSpotVMConstant(name = "VM_Version::vis3_instructions_m", archs = {"sparc"}) @Stable public int sparcVis3Instructions; @@ -1141,7 +1142,7 @@ @HotSpotVMField(name = "JavaFrameAnchor::_last_Java_sp", type = "intptr_t*", get = HotSpotVMField.Type.OFFSET) @Stable private int javaFrameAnchorLastJavaSpOffset; @HotSpotVMField(name = "JavaFrameAnchor::_last_Java_pc", type = "address", get = HotSpotVMField.Type.OFFSET) @Stable private int javaFrameAnchorLastJavaPcOffset; - @HotSpotVMField(name = "JavaFrameAnchor::_last_Java_fp", type = "intptr_t*", get = HotSpotVMField.Type.OFFSET, archs = {"amd64"}) @Stable private int javaFrameAnchorLastJavaFpOffset; + @HotSpotVMField(name = "JavaFrameAnchor::_last_Java_fp", type = "intptr_t*", get = HotSpotVMField.Type.OFFSET, archs = {"aarch64, amd64"}) @Stable private int javaFrameAnchorLastJavaFpOffset; @HotSpotVMField(name = "JavaFrameAnchor::_flags", type = "int", get = HotSpotVMField.Type.OFFSET, archs = {"sparc"}) @Stable private int javaFrameAnchorFlagsOffset; public int threadLastJavaSpOffset() { @@ -1152,11 +1153,8 @@ return javaThreadAnchorOffset + javaFrameAnchorLastJavaPcOffset; } - /** - * This value is only valid on AMD64. - */ public int threadLastJavaFpOffset() { - // TODO add an assert for AMD64 + assert getHostArchitectureName().equals("aarch64") || getHostArchitectureName().equals("amd64"); return javaThreadAnchorOffset + javaFrameAnchorLastJavaFpOffset; }
--- a/src/os/aix/vm/attachListener_aix.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os/aix/vm/attachListener_aix.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2015 SAP SE. All rights reserved. + * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -225,7 +225,7 @@ // We must call bind with the actual socketaddr length. This is obligatory for AS400. int res = ::bind(listener, (struct sockaddr*)&addr, SUN_LEN(&addr)); if (res == -1) { - RESTARTABLE(::close(listener), res); + ::close(listener); return -1; } @@ -238,7 +238,7 @@ } } if (res == -1) { - RESTARTABLE(::close(listener), res); + ::close(listener); ::unlink(initial_path); return -1; } @@ -400,7 +400,7 @@ AixAttachOperation* op = read_request(s); if (op == NULL) { int res; - RESTARTABLE(::close(s), res); + ::close(s); continue; } else { return op; @@ -452,7 +452,7 @@ } // done - RESTARTABLE(::close(this->socket()), rc); + ::close(this->socket()); // were we externally suspended while we were waiting? thread->check_and_wait_while_suspended();
--- a/src/os/aix/vm/os_aix.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os/aix/vm/os_aix.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,6 +1,6 @@ /* - * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2015 SAP SE. All rights reserved. + * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,6 +36,7 @@ #include "compiler/compileBroker.hpp" #include "interpreter/interpreter.hpp" #include "jvm_aix.h" +#include "logging/log.hpp" #include "libo4.hpp" #include "libperfstat_aix.hpp" #include "libodm_aix.hpp" @@ -791,13 +792,8 @@ const pthread_t pthread_id = ::pthread_self(); const tid_t kernel_thread_id = ::thread_self(); - trcVerbose("newborn Thread : pthread-id %u, ktid " UINT64_FORMAT - ", stack %p ... %p, stacksize 0x%IX (%IB)", - pthread_id, kernel_thread_id, - thread->stack_end(), - thread->stack_base(), - thread->stack_size(), - thread->stack_size()); + log_info(os, thread)("Thread is alive (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT ").", + os::current_thread_id(), (uintx) kernel_thread_id); // Normally, pthread stacks on AIX live in the data segment (are allocated with malloc() // by the pthread library). In rare cases, this may not be the case, e.g. when third-party @@ -805,7 +801,7 @@ // guard pages on those stacks, because the stacks may reside in memory which is not // protectable (shmated). if (thread->stack_base() > ::sbrk(0)) { - trcVerbose("Thread " UINT64_FORMAT ": stack not in data segment.", (uint64_t) pthread_id); + log_warning(os, thread)("Thread stack not in data segment."); } // Try to randomize the cache line index of hot stack frames. @@ -839,8 +835,8 @@ // Call one more level start routine. thread->run(); - trcVerbose("Thread finished : pthread-id %u, ktid " UINT64_FORMAT ".", - pthread_id, kernel_thread_id); + log_info(os, thread)("Thread finished (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT ").", + os::current_thread_id(), (uintx) kernel_thread_id); return 0; } @@ -908,20 +904,19 @@ pthread_t tid; int ret = pthread_create(&tid, &attr, (void* (*)(void*)) java_start, thread); - pthread_attr_destroy(&attr); - + + char buf[64]; if (ret == 0) { - trcVerbose("Created New Thread : pthread-id %u", tid); + log_info(os, thread)("Thread started (pthread id: " UINTX_FORMAT ", attributes: %s). ", + (uintx) tid, os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); } else { - if (os::Aix::on_pase()) { - // QIBM_MULTI_THREADED=Y is needed when the launcher is started on iSeries - // using QSH. Otherwise pthread_create fails with errno=11. - trcVerbose("(Please make sure you set the environment variable " - "QIBM_MULTI_THREADED=Y before running this program.)"); - } - if (PrintMiscellaneous && (Verbose || WizardMode)) { - perror("pthread_create()"); - } + log_warning(os, thread)("Failed to start thread - pthread_create failed (%s) for attributes: %s.", + strerror(ret), os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); + } + + pthread_attr_destroy(&attr); + + if (ret != 0) { // Need to clean up stuff we've allocated so far thread->set_osthread(NULL); delete osthread; @@ -958,13 +953,6 @@ const pthread_t pthread_id = ::pthread_self(); const tid_t kernel_thread_id = ::thread_self(); - trcVerbose("attaching Thread : pthread-id %u, ktid " UINT64_FORMAT ", stack %p ... %p, stacksize 0x%IX (%IB)", - pthread_id, kernel_thread_id, - thread->stack_end(), - thread->stack_base(), - thread->stack_size(), - thread->stack_size()); - // OSThread::thread_id is the pthread id. osthread->set_thread_id(pthread_id); @@ -990,6 +978,9 @@ // and save the caller's signal mask os::Aix::hotspot_sigmask(thread); + log_info(os, thread)("Thread attached (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT ").", + os::current_thread_id(), (uintx) kernel_thread_id); + return true; }
--- a/src/os/aix/vm/perfMemory_aix.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os/aix/vm/perfMemory_aix.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2013 SAP SE. All rights reserved. + * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -121,7 +121,7 @@ addr += result; } - RESTARTABLE(::close(fd), result); + result = ::close(fd); if (PrintMiscellaneous && Verbose) { if (result == OS_ERR) { warning("Could not close %s: %s\n", destfile, strerror(errno)); @@ -299,10 +299,13 @@ bool create; int error; int fd; + int result; create = false; - if (lstat(path, &orig_st) != 0) { + RESTARTABLE(::lstat(path, &orig_st), result); + + if (result == OS_ERR) { if (errno == ENOENT && (oflag & O_CREAT) != 0) { // File doesn't exist, but_we want to create it, add O_EXCL flag // to make sure no-one creates it (or a symlink) before us @@ -316,7 +319,7 @@ return OS_ERR; } } else { - // Lstat success, check if existing file is a link. + // lstat success, check if existing file is a link. if ((orig_st.st_mode & S_IFMT) == S_IFLNK) { // File is a symlink. errno = ELOOP; @@ -325,9 +328,9 @@ } if (use_mode == true) { - fd = open(path, oflag, mode); + RESTARTABLE(::open(path, oflag, mode), fd); } else { - fd = open(path, oflag); + RESTARTABLE(::open(path, oflag), fd); } if (fd == OS_ERR) { @@ -336,7 +339,8 @@ // Can't do inode checks on before/after if we created the file. if (create == false) { - if (fstat(fd, &new_st) != 0) { + RESTARTABLE(::fstat(fd, &new_st), result); + if (result == OS_ERR) { // Keep errno from fstat, in case close also fails. error = errno; ::close(fd); @@ -384,7 +388,7 @@ RESTARTABLE(::open(dirname, O_RDONLY|O_NOFOLLOW), result); #else // workaround (jdk6 coding) - RESTARTABLE(::open_o_nofollow(dirname, O_RDONLY), result); + result = open_o_nofollow(dirname, O_RDONLY); #endif if (result == OS_ERR) { @@ -888,7 +892,7 @@ RESTARTABLE(::open(filename, O_RDWR|O_CREAT|O_NOFOLLOW, S_IREAD|S_IWRITE), result); #else // workaround function (jdk6 code) - RESTARTABLE(::open_o_nofollow(filename, O_RDWR|O_CREAT, S_IREAD|S_IWRITE), result); + result = open_o_nofollow(filename, O_RDWR|O_CREAT, S_IREAD|S_IWRITE); #endif if (result == OS_ERR) { @@ -931,7 +935,7 @@ if (PrintMiscellaneous && Verbose) { warning("could not set shared memory file size: %s\n", strerror(errno)); } - RESTARTABLE(::close(fd), result); + ::close(fd); return -1; } @@ -951,7 +955,7 @@ #ifdef O_NOFOLLOW RESTARTABLE(::open(filename, oflags), result); #else - RESTARTABLE(::open_o_nofollow(filename, oflags), result); + open_o_nofollow(filename, oflags); #endif if (result == OS_ERR) { @@ -1006,8 +1010,7 @@ char* dirname = get_user_tmp_dir(user_name); char* filename = get_sharedmem_filename(dirname, vmid); - - // Get the short filename. + // get the short filename. char* short_filename = strrchr(filename, '/'); if (short_filename == NULL) { short_filename = filename; @@ -1033,9 +1036,7 @@ mapAddress = (char*)::mmap((char*)0, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - // attempt to close the file - restart it if it was interrupted, - // but ignore other failures - RESTARTABLE(::close(fd), result); + result = ::close(fd); assert(result != OS_ERR, "could not close file"); if (mapAddress == MAP_FAILED) { @@ -1142,7 +1143,6 @@ // constructs for the file and the shared memory mapping. if (mode == PerfMemory::PERF_MODE_RO) { mmap_prot = PROT_READ; - // No O_NOFOLLOW defined at buildtime, and it is not documented for open. #ifdef O_NOFOLLOW file_flags = O_RDONLY | O_NOFOLLOW; @@ -1205,21 +1205,28 @@ FREE_C_HEAP_ARRAY(char, filename); // open the shared memory file for the give vmid - fd = open_sharedmem_file(rfilename, file_flags, CHECK); - assert(fd != OS_ERR, "unexpected value"); + fd = open_sharedmem_file(rfilename, file_flags, THREAD); + + if (fd == OS_ERR) { + return; + } + + if (HAS_PENDING_EXCEPTION) { + ::close(fd); + return; + } if (*sizep == 0) { size = sharedmem_filesize(fd, CHECK); - assert(size != 0, "unexpected size"); } else { size = *sizep; } + assert(size > 0, "unexpected size <= 0"); + mapAddress = (char*)::mmap((char*)0, size, mmap_prot, MAP_SHARED, fd, 0); - // attempt to close the file - restart if it gets interrupted, - // but ignore other failures - RESTARTABLE(::close(fd), result); + result = ::close(fd); assert(result != OS_ERR, "could not close file"); if (mapAddress == MAP_FAILED) { @@ -1230,7 +1237,7 @@ "Could not map PerfMemory"); } - // It does not go through os api, the operation has to record from here. + // it does not go through os api, the operation has to record from here. MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC, mtInternal); *addr = mapAddress; @@ -1238,7 +1245,7 @@ if (PerfTraceMemOps) { tty->print("mapped " SIZE_FORMAT " bytes for vmid %d at " - INTPTR_FORMAT "\n", size, vmid, (void*)mapAddress); + INTPTR_FORMAT "\n", size, vmid, p2i((void*)mapAddress)); } }
--- a/src/os/bsd/vm/os_bsd.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os/bsd/vm/os_bsd.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -32,6 +32,7 @@ #include "compiler/disassembler.hpp" #include "interpreter/interpreter.hpp" #include "jvm_bsd.h" +#include "logging/log.hpp" #include "memory/allocation.inline.hpp" #include "memory/filemap.hpp" #include "mutex_bsd.inline.hpp" @@ -681,6 +682,9 @@ osthread->set_thread_id(os::Bsd::gettid()); + log_info(os, thread)("Thread is alive (tid: " UINTX_FORMAT ", pthread id: " UINTX_FORMAT ").", + os::current_thread_id(), (uintx) pthread_self()); + #ifdef __APPLE__ uint64_t unique_thread_id = locate_unique_thread_id(osthread->thread_id()); guarantee(unique_thread_id != 0, "unique thread id was not found"); @@ -716,6 +720,9 @@ // call one more level start routine thread->run(); + log_info(os, thread)("Thread finished (tid: " UINTX_FORMAT ", pthread id: " UINTX_FORMAT ").", + os::current_thread_id(), (uintx) pthread_self()); + return 0; } @@ -776,12 +783,18 @@ pthread_t tid; int ret = pthread_create(&tid, &attr, (void* (*)(void*)) java_start, thread); + char buf[64]; + if (ret == 0) { + log_info(os, thread)("Thread started (pthread id: " UINTX_FORMAT ", attributes: %s). ", + (uintx) tid, os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); + } else { + log_warning(os, thread)("Failed to start thread - pthread_create failed (%s) for attributes: %s.", + strerror(ret), os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); + } + pthread_attr_destroy(&attr); if (ret != 0) { - if (PrintMiscellaneous && (Verbose || WizardMode)) { - perror("pthread_create()"); - } // Need to clean up stuff we've allocated so far thread->set_osthread(NULL); delete osthread; @@ -858,6 +871,9 @@ // and save the caller's signal mask os::Bsd::hotspot_sigmask(thread); + log_info(os, thread)("Thread attached (tid: " UINTX_FORMAT ", pthread id: " UINTX_FORMAT ").", + os::current_thread_id(), (uintx) pthread_self()); + return true; }
--- a/src/os/linux/vm/os_linux.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os/linux/vm/os_linux.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -144,6 +144,7 @@ int os::Linux::_page_size = -1; const int os::Linux::_vm_default_page_size = (8 * K); bool os::Linux::_supports_fast_thread_cpu_time = false; +uint32_t os::Linux::_os_version = 0; const char * os::Linux::_glibc_version = NULL; const char * os::Linux::_libpthread_version = NULL; pthread_condattr_t os::Linux::_condattr[1]; @@ -662,6 +663,9 @@ osthread->set_thread_id(os::current_thread_id()); + log_info(os, thread)("Thread is alive (tid: " UINTX_FORMAT ", pthread id: " UINTX_FORMAT ").", + os::current_thread_id(), (uintx) pthread_self()); + if (UseNUMA) { int lgrp_id = os::numa_get_group_id(); if (lgrp_id != -1) { @@ -691,6 +695,9 @@ // call one more level start routine thread->run(); + log_info(os, thread)("Thread finished (tid: " UINTX_FORMAT ", pthread id: " UINTX_FORMAT ").", + os::current_thread_id(), (uintx) pthread_self()); + return 0; } @@ -756,12 +763,18 @@ pthread_t tid; int ret = pthread_create(&tid, &attr, (void* (*)(void*)) java_start, thread); + char buf[64]; + if (ret == 0) { + log_info(os, thread)("Thread started (pthread id: " UINTX_FORMAT ", attributes: %s). ", + (uintx) tid, os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); + } else { + log_warning(os, thread)("Failed to start thread - pthread_create failed (%s) for attributes: %s.", + strerror(ret), os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); + } + pthread_attr_destroy(&attr); if (ret != 0) { - if (PrintMiscellaneous && (Verbose || WizardMode)) { - perror("pthread_create()"); - } // Need to clean up stuff we've allocated so far thread->set_osthread(NULL); delete osthread; @@ -858,6 +871,9 @@ // and save the caller's signal mask os::Linux::hotspot_sigmask(thread); + log_info(os, thread)("Thread attached (tid: " UINTX_FORMAT ", pthread id: " UINTX_FORMAT ").", + os::current_thread_id(), (uintx) pthread_self()); + return true; } @@ -4341,6 +4357,48 @@ return (tp.tv_sec * NANOSECS_PER_SEC) + tp.tv_nsec; } +void os::Linux::initialize_os_info() { + assert(_os_version == 0, "OS info already initialized"); + + struct utsname _uname; + + uint32_t major; + uint32_t minor; + uint32_t fix; + + int rc; + + // Kernel version is unknown if + // verification below fails. + _os_version = 0x01000000; + + rc = uname(&_uname); + if (rc != -1) { + + rc = sscanf(_uname.release,"%d.%d.%d", &major, &minor, &fix); + if (rc == 3) { + + if (major < 256 && minor < 256 && fix < 256) { + // Kernel version format is as expected, + // set it overriding unknown state. + _os_version = (major << 16) | + (minor << 8 ) | + (fix << 0 ) ; + } + } + } +} + +uint32_t os::Linux::os_version() { + assert(_os_version != 0, "not initialized"); + return _os_version & 0x00FFFFFF; +} + +bool os::Linux::os_version_is_known() { + assert(_os_version != 0, "not initialized"); + return _os_version & 0x01000000 ? false : true; +} + ///// // glibc on Linux platform uses non-documented flag // to indicate, that some special sort of signal @@ -4563,6 +4621,8 @@ Linux::initialize_system_info(); + Linux::initialize_os_info(); + // main_thread points to the aboriginal thread Linux::_main_thread = pthread_self();
--- a/src/os/linux/vm/os_linux.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os/linux/vm/os_linux.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -56,6 +56,15 @@ static GrowableArray<int>* _cpu_to_node; + // 0x00000000 = uninitialized, + // 0x01000000 = kernel version unknown, + // otherwise a 32-bit number: + // Ox00AABBCC + // AA, Major Version + // BB, Minor Version + // CC, Fix Version + static uint32_t _os_version; + protected: static julong _physical_memory; @@ -198,6 +207,10 @@ static jlong fast_thread_cpu_time(clockid_t clockid); + static void initialize_os_info(); + static bool os_version_is_known(); + static uint32_t os_version(); + // pthread_cond clock suppport private: static pthread_condattr_t _condattr[1];
--- a/src/os/posix/vm/os_posix.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os/posix/vm/os_posix.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -238,14 +238,12 @@ st->cr(); } -#ifndef PRODUCT bool os::get_host_name(char* buf, size_t buflen) { struct utsname name; uname(&name); jio_snprintf(buf, buflen, "%s", name.nodename); return true; } -#endif // PRODUCT bool os::has_allocatable_memory_limit(julong* limit) { struct rlimit rlim; @@ -1073,6 +1071,19 @@ #endif } +char* os::Posix::describe_pthread_attr(char* buf, size_t buflen, const pthread_attr_t* attr) { + size_t stack_size = 0; + size_t guard_size = 0; + int detachstate = 0; + pthread_attr_getstacksize(attr, &stack_size); + pthread_attr_getguardsize(attr, &guard_size); + pthread_attr_getdetachstate(attr, &detachstate); + jio_snprintf(buf, buflen, "stacksize: " SIZE_FORMAT "k, guardsize: " SIZE_FORMAT "k, %s", + stack_size / 1024, guard_size / 1024, + (detachstate == PTHREAD_CREATE_DETACHED ? "detached" : "joinable")); + return buf; +} + os::WatcherThreadCrashProtection::WatcherThreadCrashProtection() { assert(Thread::current()->is_Watcher_thread(), "Must be WatcherThread");
--- a/src/os/posix/vm/os_posix.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os/posix/vm/os_posix.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -76,6 +76,11 @@ static address ucontext_get_pc(const ucontext_t* ctx); // Set PC into context. Needed for continuation after signal. static void ucontext_set_pc(ucontext_t* ctx, address pc); + + // Helper function; describes pthread attributes as short string. String is written + // to buf with len buflen; buf is returned. + static char* describe_pthread_attr(char* buf, size_t buflen, const pthread_attr_t* attr); + }; /*
--- a/src/os/solaris/vm/os_solaris.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os/solaris/vm/os_solaris.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,6 +32,7 @@ #include "compiler/disassembler.hpp" #include "interpreter/interpreter.hpp" #include "jvm_solaris.h" +#include "logging/log.hpp" #include "memory/allocation.inline.hpp" #include "memory/filemap.hpp" #include "mutex_solaris.inline.hpp" @@ -68,6 +69,7 @@ #include "utilities/defaultStream.hpp" #include "utilities/events.hpp" #include "utilities/growableArray.hpp" +#include "utilities/macros.hpp" #include "utilities/vmError.hpp" // put OS-includes here @@ -736,6 +738,9 @@ osthr->set_lwp_id(_lwp_self()); // Store lwp in case we are bound thread->_schedctl = (void *) schedctl_init(); + log_info(os, thread)("Thread is alive (tid: " UINTX_FORMAT ").", + os::current_thread_id()); + if (UseNUMA) { int lgrp_id = os::numa_get_group_id(); if (lgrp_id != -1) { @@ -781,6 +786,8 @@ Atomic::dec(&os::Solaris::_os_thread_count); } + log_info(os, thread)("Thread finished (tid: " UINTX_FORMAT ").", os::current_thread_id()); + if (UseDetachedThreads) { thr_exit(NULL); ShouldNotReachHere(); @@ -853,6 +860,9 @@ // and save the caller's signal mask os::Solaris::hotspot_sigmask(thread); + log_info(os, thread)("Thread attached (tid: " UINTX_FORMAT ").", + os::current_thread_id()); + return true; } @@ -879,6 +889,24 @@ return true; } +// Helper function to trace thread attributes, similar to os::Posix::describe_pthread_attr() +static char* describe_thr_create_attributes(char* buf, size_t buflen, + size_t stacksize, long flags) { + stringStream ss(buf, buflen); + ss.print("stacksize: " SIZE_FORMAT "k, ", stacksize / 1024); + ss.print("flags: "); + #define PRINT_FLAG(f) if (flags & f) ss.print( #f " "); + #define ALL(X) \ + X(THR_SUSPENDED) \ + X(THR_DETACHED) \ + X(THR_BOUND) \ + X(THR_NEW_LWP) \ + X(THR_DAEMON) + ALL(PRINT_FLAG) + #undef ALL + #undef PRINT_FLAG + return buf; +} bool os::create_thread(Thread* thread, ThreadType thr_type, size_t stack_size) { @@ -974,10 +1002,17 @@ osthread->set_thread_id(-1); status = thr_create(NULL, stack_size, java_start, thread, flags, &tid); + + char buf[64]; + if (status == 0) { + log_info(os, thread)("Thread started (tid: " UINTX_FORMAT ", attributes: %s). ", + (uintx) tid, describe_thr_create_attributes(buf, sizeof(buf), stack_size, flags)); + } else { + log_warning(os, thread)("Failed to start thread - thr_create failed (%s) for attributes: %s.", + strerror(status), describe_thr_create_attributes(buf, sizeof(buf), stack_size, flags)); + } + if (status != 0) { - if (PrintMiscellaneous && (Verbose || WizardMode)) { - perror("os::create_thread"); - } thread->set_osthread(NULL); // Need to clean up stuff we've allocated so far delete osthread;
--- a/src/os/windows/vm/os_windows.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os/windows/vm/os_windows.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -35,6 +35,7 @@ #include "compiler/disassembler.hpp" #include "interpreter/interpreter.hpp" #include "jvm_windows.h" +#include "logging/log.hpp" #include "memory/allocation.inline.hpp" #include "memory/filemap.hpp" #include "mutex_windows.inline.hpp" @@ -71,6 +72,7 @@ #include "utilities/defaultStream.hpp" #include "utilities/events.hpp" #include "utilities/growableArray.hpp" +#include "utilities/macros.hpp" #include "utilities/vmError.hpp" #ifdef _DEBUG @@ -436,6 +438,8 @@ res = 20115; // java thread } + log_info(os, thread)("Thread is alive (tid: " UINTX_FORMAT ").", os::current_thread_id()); + // Install a win32 structured exception handler around every thread created // by VM, so VM can generate error dump when an exception occurred in non- // Java thread (e.g. VM thread). @@ -446,6 +450,8 @@ // Nothing to do. } + log_info(os, thread)("Thread finished (tid: " UINTX_FORMAT ").", os::current_thread_id()); + // One less thread is executing // When the VMThread gets here, the main thread may have already exited // which frees the CodeHeap containing the Atomic::add code @@ -509,6 +515,10 @@ osthread->set_state(RUNNABLE); thread->set_osthread(osthread); + + log_info(os, thread)("Thread attached (tid: " UINTX_FORMAT ").", + os::current_thread_id()); + return true; } @@ -530,6 +540,27 @@ return true; } +// Helper function to trace _beginthreadex attributes, +// similar to os::Posix::describe_pthread_attr() +static char* describe_beginthreadex_attributes(char* buf, size_t buflen, + size_t stacksize, unsigned initflag) { + stringStream ss(buf, buflen); + if (stacksize == 0) { + ss.print("stacksize: default, "); + } else { + ss.print("stacksize: " SIZE_FORMAT "k, ", stacksize / 1024); + } + ss.print("flags: "); + #define PRINT_FLAG(f) if (initflag & f) ss.print( #f " "); + #define ALL(X) \ + X(CREATE_SUSPENDED) \ + X(STACK_SIZE_PARAM_IS_A_RESERVATION) + ALL(PRINT_FLAG) + #undef ALL + #undef PRINT_FLAG + return buf; +} + // Allocate and initialize a new OSThread bool os::create_thread(Thread* thread, ThreadType thr_type, size_t stack_size) { @@ -596,14 +627,24 @@ // document because JVM uses C runtime library. The good news is that the // flag appears to work with _beginthredex() as well. + const unsigned initflag = CREATE_SUSPENDED | STACK_SIZE_PARAM_IS_A_RESERVATION; HANDLE thread_handle = (HANDLE)_beginthreadex(NULL, (unsigned)stack_size, (unsigned (__stdcall *)(void*)) java_start, thread, - CREATE_SUSPENDED | STACK_SIZE_PARAM_IS_A_RESERVATION, + initflag, &thread_id); + char buf[64]; + if (thread_handle != NULL) { + log_info(os, thread)("Thread started (tid: %u, attributes: %s)", + thread_id, describe_beginthreadex_attributes(buf, sizeof(buf), stack_size, initflag)); + } else { + log_warning(os, thread)("Failed to start thread - _beginthreadex failed (%s) for attributes: %s.", + strerror(errno), describe_beginthreadex_attributes(buf, sizeof(buf), stack_size, initflag)); + } + if (thread_handle == NULL) { // Need to clean up stuff we've allocated so far CloseHandle(osthread->interrupt_event()); @@ -1531,12 +1572,10 @@ return result; } -#ifndef PRODUCT bool os::get_host_name(char* buf, size_t buflen) { DWORD size = (DWORD)buflen; return (GetComputerNameEx(ComputerNameDnsHostname, buf, &size) == TRUE); } -#endif // PRODUCT void os::get_summary_os_info(char* buf, size_t buflen) { stringStream sst(buf, buflen); @@ -1670,8 +1709,7 @@ if (is_workstation) { st->print("10"); } else { - // The server version name of Windows 10 is not known at this time - st->print("%d.%d", major_version, minor_version); + st->print("Server 2016"); } break;
--- a/src/os/windows/vm/perfMemory_windows.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os/windows/vm/perfMemory_windows.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -628,6 +628,7 @@ if (!is_directory_secure(dirname)) { // the directory is not secure, don't attempt any cleanup + os::closedir(dirp); return; } @@ -1445,6 +1446,8 @@ // check that the file system is secure - i.e. it supports ACLs. if (!is_filesystem_secure(dirname)) { + FREE_C_HEAP_ARRAY(char, dirname); + FREE_C_HEAP_ARRAY(char, user); return NULL; } @@ -1624,6 +1627,7 @@ // if (!is_directory_secure(dirname)) { FREE_C_HEAP_ARRAY(char, dirname); + if (luser != user) FREE_C_HEAP_ARRAY(char, luser); THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), "Process not found"); }
--- a/src/os_cpu/linux_aarch64/vm/copy_linux_aarch64.inline.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os_cpu/linux_aarch64/vm/copy_linux_aarch64.inline.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -26,44 +26,108 @@ #ifndef OS_CPU_LINUX_AARCH64_VM_COPY_LINUX_AARCH64_INLINE_HPP #define OS_CPU_LINUX_AARCH64_VM_COPY_LINUX_AARCH64_INLINE_HPP +#define COPY_SMALL(from, to, count) \ +{ \ + long tmp0, tmp1, tmp2, tmp3; \ + long tmp4, tmp5, tmp6, tmp7; \ + __asm volatile( \ +" adr %[t0], 0f;" \ +" add %[t0], %[t0], %[cnt], lsl #5;" \ +" br %[t0];" \ +" .align 5;" \ +"0:" \ +" b 1f;" \ +" .align 5;" \ +" ldr %[t0], [%[s], #0];" \ +" str %[t0], [%[d], #0];" \ +" b 1f;" \ +" .align 5;" \ +" ldp %[t0], %[t1], [%[s], #0];" \ +" stp %[t0], %[t1], [%[d], #0];" \ +" b 1f;" \ +" .align 5;" \ +" ldp %[t0], %[t1], [%[s], #0];" \ +" ldr %[t2], [%[s], #16];" \ +" stp %[t0], %[t1], [%[d], #0];" \ +" str %[t2], [%[d], #16];" \ +" b 1f;" \ +" .align 5;" \ +" ldp %[t0], %[t1], [%[s], #0];" \ +" ldp %[t2], %[t3], [%[s], #16];" \ +" stp %[t0], %[t1], [%[d], #0];" \ +" stp %[t2], %[t3], [%[d], #16];" \ +" b 1f;" \ +" .align 5;" \ +" ldp %[t0], %[t1], [%[s], #0];" \ +" ldp %[t2], %[t3], [%[s], #16];" \ +" ldr %[t4], [%[s], #32];" \ +" stp %[t0], %[t1], [%[d], #0];" \ +" stp %[t2], %[t3], [%[d], #16];" \ +" str %[t4], [%[d], #32];" \ +" b 1f;" \ +" .align 5;" \ +" ldp %[t0], %[t1], [%[s], #0];" \ +" ldp %[t2], %[t3], [%[s], #16];" \ +" ldp %[t4], %[t5], [%[s], #32];" \ +"2:" \ +" stp %[t0], %[t1], [%[d], #0];" \ +" stp %[t2], %[t3], [%[d], #16];" \ +" stp %[t4], %[t5], [%[d], #32];" \ +" b 1f;" \ +" .align 5;" \ +" ldr %[t6], [%[s], #0];" \ +" ldp %[t0], %[t1], [%[s], #8];" \ +" ldp %[t2], %[t3], [%[s], #24];" \ +" ldp %[t4], %[t5], [%[s], #40];" \ +" str %[t6], [%[d]], #8;" \ +" b 2b;" \ +" .align 5;" \ +" ldp %[t0], %[t1], [%[s], #0];" \ +" ldp %[t2], %[t3], [%[s], #16];" \ +" ldp %[t4], %[t5], [%[s], #32];" \ +" ldp %[t6], %[t7], [%[s], #48];" \ +" stp %[t0], %[t1], [%[d], #0];" \ +" stp %[t2], %[t3], [%[d], #16];" \ +" stp %[t4], %[t5], [%[d], #32];" \ +" stp %[t6], %[t7], [%[d], #48];" \ +"1:" \ + \ + : [s]"+r"(from), [d]"+r"(to), [cnt]"+r"(count), \ + [t0]"=&r"(tmp0), [t1]"=&r"(tmp1), [t2]"=&r"(tmp2), [t3]"=&r"(tmp3), \ + [t4]"=&r"(tmp4), [t5]"=&r"(tmp5), [t6]"=&r"(tmp6), [t7]"=&r"(tmp7) \ + : \ + : "memory", "cc"); \ +} + static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { - (void)memmove(to, from, count * HeapWordSize); + __asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory"); + if (__builtin_expect(count <= 8, 1)) { + COPY_SMALL(from, to, count); + return; + } + _Copy_conjoint_words(from, to, count); } static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { - switch (count) { - case 8: to[7] = from[7]; - case 7: to[6] = from[6]; - case 6: to[5] = from[5]; - case 5: to[4] = from[4]; - case 4: to[3] = from[3]; - case 3: to[2] = from[2]; - case 2: to[1] = from[1]; - case 1: to[0] = from[0]; - case 0: break; - default: - (void)memcpy(to, from, count * HeapWordSize); - break; + if (__builtin_constant_p(count)) { + memcpy(to, from, count * sizeof(HeapWord)); + return; } + __asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory"); + if (__builtin_expect(count <= 8, 1)) { + COPY_SMALL(from, to, count); + return; + } + _Copy_disjoint_words(from, to, count); } static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) { - switch (count) { - case 8: to[7] = from[7]; - case 7: to[6] = from[6]; - case 6: to[5] = from[5]; - case 5: to[4] = from[4]; - case 4: to[3] = from[3]; - case 3: to[2] = from[2]; - case 2: to[1] = from[1]; - case 1: to[0] = from[0]; - case 0: break; - default: - while (count-- > 0) { - *to++ = *from++; - } - break; + __asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory"); + if (__builtin_expect(count <= 8, 1)) { + COPY_SMALL(from, to, count); + return; } + _Copy_disjoint_words(from, to, count); } static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/os_cpu/linux_aarch64/vm/copy_linux_aarch64.s Thu Mar 17 17:03:20 2016 +0000 @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2016, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + .global _Copy_conjoint_words + .global _Copy_disjoint_words + +s .req x0 +d .req x1 +count .req x2 +t0 .req x3 +t1 .req x4 +t2 .req x5 +t3 .req x6 +t4 .req x7 +t5 .req x8 +t6 .req x9 +t7 .req x10 + + .align 6 +_Copy_disjoint_words: + // Ensure 2 word aligned + tbz s, #3, fwd_copy_aligned + ldr t0, [s], #8 + str t0, [d], #8 + sub count, count, #1 + +fwd_copy_aligned: + // Bias s & d so we only pre index on the last copy + sub s, s, #16 + sub d, d, #16 + + ldp t0, t1, [s, #16] + ldp t2, t3, [s, #32] + ldp t4, t5, [s, #48] + ldp t6, t7, [s, #64]! + + subs count, count, #16 + blo fwd_copy_drain + +fwd_copy_again: + prfm pldl1keep, [s, #256] + stp t0, t1, [d, #16] + ldp t0, t1, [s, #16] + stp t2, t3, [d, #32] + ldp t2, t3, [s, #32] + stp t4, t5, [d, #48] + ldp t4, t5, [s, #48] + stp t6, t7, [d, #64]! + ldp t6, t7, [s, #64]! + subs count, count, #8 + bhs fwd_copy_again + +fwd_copy_drain: + stp t0, t1, [d, #16] + stp t2, t3, [d, #32] + stp t4, t5, [d, #48] + stp t6, t7, [d, #64]! + + // count is now -8..-1 for 0..7 words to copy + adr t0, 0f + add t0, t0, count, lsl #5 + br t0 + + .align 5 + ret // -8 == 0 words + .align 5 + ldr t0, [s, #16] // -7 == 1 word + str t0, [d, #16] + ret + .align 5 + ldp t0, t1, [s, #16] // -6 = 2 words + stp t0, t1, [d, #16] + ret + .align 5 + ldp t0, t1, [s, #16] // -5 = 3 words + ldr t2, [s, #32] + stp t0, t1, [d, #16] + str t2, [d, #32] + ret + .align 5 + ldp t0, t1, [s, #16] // -4 = 4 words + ldp t2, t3, [s, #32] + stp t0, t1, [d, #16] + stp t2, t3, [d, #32] + ret + .align 5 + ldp t0, t1, [s, #16] // -3 = 5 words + ldp t2, t3, [s, #32] + ldr t4, [s, #48] + stp t0, t1, [d, #16] + stp t2, t3, [d, #32] + str t4, [d, #48] + ret + .align 5 + ldp t0, t1, [s, #16] // -2 = 6 words + ldp t2, t3, [s, #32] + ldp t4, t5, [s, #48] + stp t0, t1, [d, #16] + stp t2, t3, [d, #32] + stp t4, t5, [d, #48] + ret + .align 5 + ldp t0, t1, [s, #16] // -1 = 7 words + ldp t2, t3, [s, #32] + ldp t4, t5, [s, #48] + ldr t6, [s, #64] + stp t0, t1, [d, #16] + stp t2, t3, [d, #32] + stp t4, t5, [d, #48] + str t6, [d, #64] + // Is always aligned here, code for 7 words is one instruction + // too large so it just falls through. + .align 5 +0: + ret + + .align 6 +_Copy_conjoint_words: + sub t0, d, s + cmp t0, count, lsl #3 + bhs _Copy_disjoint_words + + add s, s, count, lsl #3 + add d, d, count, lsl #3 + + // Ensure 2 word aligned + tbz s, #3, bwd_copy_aligned + ldr t0, [s, #-8]! + str t0, [d, #-8]! + sub count, count, #1 + +bwd_copy_aligned: + ldp t0, t1, [s, #-16] + ldp t2, t3, [s, #-32] + ldp t4, t5, [s, #-48] + ldp t6, t7, [s, #-64]! + + subs count, count, #16 + blo bwd_copy_drain + +bwd_copy_again: + prfm pldl1keep, [s, #-256] + stp t0, t1, [d, #-16] + ldp t0, t1, [s, #-16] + stp t2, t3, [d, #-32] + ldp t2, t3, [s, #-32] + stp t4, t5, [d, #-48] + ldp t4, t5, [s, #-48] + stp t6, t7, [d, #-64]! + ldp t6, t7, [s, #-64]! + subs count, count, #8 + bhs bwd_copy_again + +bwd_copy_drain: + stp t0, t1, [d, #-16] + stp t2, t3, [d, #-32] + stp t4, t5, [d, #-48] + stp t6, t7, [d, #-64]! + + // count is now -8..-1 for 0..7 words to copy + adr t0, 0f + add t0, t0, count, lsl #5 + br t0 + + .align 5 + ret // -8 == 0 words + .align 5 + ldr t0, [s, #-8] // -7 == 1 word + str t0, [d, #-8] + ret + .align 5 + ldp t0, t1, [s, #-16] // -6 = 2 words + stp t0, t1, [d, #-16] + ret + .align 5 + ldp t0, t1, [s, #-16] // -5 = 3 words + ldr t2, [s, #-24] + stp t0, t1, [d, #-16] + str t2, [d, #-24] + ret + .align 5 + ldp t0, t1, [s, #-16] // -4 = 4 words + ldp t2, t3, [s, #-32] + stp t0, t1, [d, #-16] + stp t2, t3, [d, #-32] + ret + .align 5 + ldp t0, t1, [s, #-16] // -3 = 5 words + ldp t2, t3, [s, #-32] + ldr t4, [s, #-40] + stp t0, t1, [d, #-16] + stp t2, t3, [d, #-32] + str t4, [d, #-40] + ret + .align 5 + ldp t0, t1, [s, #-16] // -2 = 6 words + ldp t2, t3, [s, #-32] + ldp t4, t5, [s, #-48] + stp t0, t1, [d, #-16] + stp t2, t3, [d, #-32] + stp t4, t5, [d, #-48] + ret + .align 5 + ldp t0, t1, [s, #-16] // -1 = 7 words + ldp t2, t3, [s, #-32] + ldp t4, t5, [s, #-48] + ldr t6, [s, #-56] + stp t0, t1, [d, #-16] + stp t2, t3, [d, #-32] + stp t4, t5, [d, #-48] + str t6, [d, #-56] + // Is always aligned here, code for 7 words is one instruction + // too large so it just falls through. + .align 5 +0: + ret
--- a/src/os_cpu/linux_zero/vm/os_linux_zero.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os_cpu/linux_zero/vm/os_linux_zero.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -65,6 +65,7 @@ frame os::get_sender_for_C_frame(frame* fr) { ShouldNotCallThis(); + return frame(NULL, NULL); // silence compile warning. } frame os::current_frame() { @@ -102,6 +103,7 @@ address os::Linux::ucontext_get_pc(const ucontext_t* uc) { ShouldNotCallThis(); + return NULL; // silence compile warnings } void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { @@ -112,10 +114,12 @@ intptr_t** ret_sp, intptr_t** ret_fp) { ShouldNotCallThis(); + return NULL; // silence compile warnings } frame os::fetch_frame_from_context(const void* ucVoid) { ShouldNotCallThis(); + return frame(NULL, NULL); // silence compile warnings } extern "C" JNIEXPORT int @@ -262,11 +266,16 @@ } #endif // !PRODUCT - const char *fmt = "caught unhandled signal %d"; char buf[64]; - sprintf(buf, fmt, sig); + sprintf(buf, "caught unhandled signal %d", sig); + +// Silence -Wformat-security warning for fatal() +PRAGMA_DIAG_PUSH +PRAGMA_FORMAT_NONLITERAL_IGNORED fatal(buf); +PRAGMA_DIAG_POP + return true; // silence compiler warnings } void os::Linux::init_thread_fpu_state(void) { @@ -275,6 +284,7 @@ int os::Linux::get_fpu_control_word() { ShouldNotCallThis(); + return -1; // silence compile warnings } void os::Linux::set_fpu_control_word(int fpu) { @@ -419,6 +429,7 @@ extern "C" { int SpinPause() { + return -1; // silence compile warnings }
--- a/src/os_cpu/linux_zero/vm/thread_linux_zero.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os_cpu/linux_zero/vm/thread_linux_zero.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -110,6 +110,7 @@ void* ucontext, bool isInJava) { ShouldNotCallThis(); + return false; // silence compile warning } // These routines are only used on cpu architectures that
--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -264,6 +264,7 @@ // We need to keep these here as long as we have to build on Solaris // versions before 10. + #ifndef SI_ARCHITECTURE_32 #define SI_ARCHITECTURE_32 516 /* basic 32-bit SI_ARCHITECTURE */ #endif @@ -272,36 +273,87 @@ #define SI_ARCHITECTURE_64 517 /* basic 64-bit SI_ARCHITECTURE */ #endif -static void do_sysinfo(int si, const char* string, int* features, int mask) { - char tmp; - size_t bufsize = sysinfo(si, &tmp, 1); +#ifndef SI_CPUBRAND +#define SI_CPUBRAND 523 /* return cpu brand string */ +#endif - // All SI defines used below must be supported. - guarantee(bufsize != -1, "must be supported"); +class Sysinfo { + char* _string; +public: + Sysinfo(int si) : _string(NULL) { + char tmp; + size_t bufsize = sysinfo(si, &tmp, 1); - char* buf = (char*) os::malloc(bufsize, mtInternal); - - if (buf == NULL) - return; + if (bufsize != -1) { + char* buf = (char*) os::malloc(bufsize, mtInternal); + if (buf != NULL) { + if (sysinfo(si, buf, bufsize) == bufsize) { + _string = buf; + } else { + os::free(buf); + } + } + } + } - if (sysinfo(si, buf, bufsize) == bufsize) { - // Compare the string. - if (strcmp(buf, string) == 0) { - *features |= mask; + ~Sysinfo() { + if (_string != NULL) { + os::free(_string); } } - os::free(buf); -} + const char* value() const { + return _string; + } + + bool valid() const { + return _string != NULL; + } + + bool match(const char* s) const { + return valid() ? strcmp(_string, s) == 0 : false; + } + + bool match_substring(const char* s) const { + return valid() ? strstr(_string, s) != NULL : false; + } +}; + +class Sysconf { + int _value; +public: + Sysconf(int sc) : _value(-1) { + _value = sysconf(sc); + } + bool valid() const { + return _value != -1; + } + int value() const { + return _value; + } +}; + + +#ifndef _SC_DCACHE_LINESZ +#define _SC_DCACHE_LINESZ 508 /* Data cache line size */ +#endif + +#ifndef _SC_L2CACHE_LINESZ +#define _SC_L2CACHE_LINESZ 527 /* Size of L2 cache line */ +#endif int VM_Version::platform_features(int features) { assert(os::Solaris::supports_getisax(), "getisax() must be available"); // Check 32-bit architecture. - do_sysinfo(SI_ARCHITECTURE_32, "sparc", &features, v8_instructions_m); + if (Sysinfo(SI_ARCHITECTURE_32).match("sparc")) { + features |= v8_instructions_m; + } // Check 64-bit architecture. - do_sysinfo(SI_ARCHITECTURE_64, "sparcv9", &features, generic_v9_m); + if (Sysinfo(SI_ARCHITECTURE_64).match("sparcv9")) { + features |= generic_v9_m; + } // Extract valid instruction set extensions. uint_t avs[2]; @@ -388,67 +440,63 @@ if (av & AV_SPARC_SHA512) features |= sha512_instruction_m; // Determine the machine type. - do_sysinfo(SI_MACHINE, "sun4v", &features, sun4v_m); + if (Sysinfo(SI_MACHINE).match("sun4v")) { + features |= sun4v_m; + } - { - // Using kstat to determine the machine type. + bool use_solaris_12_api = false; + Sysinfo impl(SI_CPUBRAND); + if (impl.valid()) { + // If SI_CPUBRAND works, that means Solaris 12 API to get the cache line sizes + // is available to us as well + use_solaris_12_api = true; + features |= parse_features(impl.value()); + } else { + // Otherwise use kstat to determine the machine type. kstat_ctl_t* kc = kstat_open(); kstat_t* ksp = kstat_lookup(kc, (char*)"cpu_info", -1, NULL); - const char* implementation = "UNKNOWN"; + const char* implementation; + bool has_implementation = false; if (ksp != NULL) { if (kstat_read(kc, ksp, NULL) != -1 && ksp->ks_data != NULL) { kstat_named_t* knm = (kstat_named_t *)ksp->ks_data; for (int i = 0; i < ksp->ks_ndata; i++) { if (strcmp((const char*)&(knm[i].name),"implementation") == 0) { implementation = KSTAT_NAMED_STR_PTR(&knm[i]); + has_implementation = true; #ifndef PRODUCT if (PrintMiscellaneous && Verbose) { tty->print_cr("cpu_info.implementation: %s", implementation); } #endif - // Convert to UPPER case before compare. - char* impl = os::strdup_check_oom(implementation); - - for (int i = 0; impl[i] != 0; i++) - impl[i] = (char)toupper((uint)impl[i]); - - if (strstr(impl, "SPARC64") != NULL) { - features |= sparc64_family_m; - } else if (strstr(impl, "SPARC-M") != NULL) { - // M-series SPARC is based on T-series. - features |= (M_family_m | T_family_m); - } else if (strstr(impl, "SPARC-T") != NULL) { - features |= T_family_m; - if (strstr(impl, "SPARC-T1") != NULL) { - features |= T1_model_m; - } - } else { - if (strstr(impl, "SPARC") == NULL) { -#ifndef PRODUCT - // kstat on Solaris 8 virtual machines (branded zones) - // returns "(unsupported)" implementation. Solaris 8 is not - // supported anymore, but include this check to be on the - // safe side. - warning("kstat cpu_info implementation = '%s', assume generic SPARC", impl); -#endif - implementation = "SPARC"; - } - } - os::free((void*)impl); + features |= parse_features(implementation); break; } } // for( } } - assert(strcmp(implementation, "UNKNOWN") != 0, - "unknown cpu info (changed kstat interface?)"); + assert(has_implementation, "unknown cpu info (changed kstat interface?)"); kstat_close(kc); } - // Figure out cache line sizes using PICL - PICL picl((features & sparc64_family_m) != 0, (features & sun4v_m) != 0); - _L1_data_cache_line_size = picl.L1_data_cache_line_size(); - _L2_data_cache_line_size = picl.L2_data_cache_line_size(); + bool is_sun4v = (features & sun4v_m) != 0; + if (use_solaris_12_api && is_sun4v) { + // If Solaris 12 API is supported and it's sun4v use sysconf() to get the cache line sizes + Sysconf l1_dcache_line_size(_SC_DCACHE_LINESZ); + if (l1_dcache_line_size.valid()) { + _L1_data_cache_line_size = l1_dcache_line_size.value(); + } + Sysconf l2_dcache_line_size(_SC_L2CACHE_LINESZ); + if (l2_dcache_line_size.valid()) { + _L2_data_cache_line_size = l2_dcache_line_size.value(); + } + } else { + // Otherwise figure out the cache line sizes using PICL + bool is_fujitsu = (features & sparc64_family_m) != 0; + PICL picl(is_fujitsu, is_sun4v); + _L1_data_cache_line_size = picl.L1_data_cache_line_size(); + _L2_data_cache_line_size = picl.L2_data_cache_line_size(); + } return features; }
--- a/src/share/vm/c1/c1_Canonicalizer.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/c1/c1_Canonicalizer.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -257,7 +257,38 @@ } } -void Canonicalizer::do_LoadIndexed (LoadIndexed* x) {} +void Canonicalizer::do_LoadIndexed (LoadIndexed* x) { + StableArrayConstant* array = x->array()->type()->as_StableArrayConstant(); + IntConstant* index = x->index()->type()->as_IntConstant(); + + assert(array == NULL || FoldStableValues, "not enabled"); + + // Constant fold loads from stable arrays. + if (array != NULL && index != NULL) { + jint idx = index->value(); + if (idx < 0 || idx >= array->value()->length()) { + // Leave the load as is. The range check will handle it. + return; + } + + ciConstant field_val = array->value()->element_value(idx); + if (!field_val.is_null_or_zero()) { + jint dimension = array->dimension(); + assert(dimension <= array->value()->array_type()->dimension(), "inconsistent info"); + ValueType* value = NULL; + if (dimension > 1) { + // Preserve information about the dimension for the element. + assert(field_val.as_object()->is_array(), "not an array"); + value = new StableArrayConstant(field_val.as_object()->as_array(), dimension - 1); + } else { + assert(dimension == 1, "sanity"); + value = as_ValueType(field_val); + } + set_canonical(new Constant(value)); + } + } +} + void Canonicalizer::do_StoreIndexed (StoreIndexed* x) { // If a value is going to be stored into a field or array some of // the conversions emitted by javac are unneeded because the fields @@ -471,7 +502,7 @@ InstanceConstant* c = x->argument_at(0)->type()->as_InstanceConstant(); if (c != NULL && !c->value()->is_null_object()) { // ciInstance::java_mirror_type() returns non-NULL only for Java mirrors - ciType* t = c->value()->as_instance()->java_mirror_type(); + ciType* t = c->value()->java_mirror_type(); if (t->is_klass()) { // substitute cls.isInstance(obj) of a constant Class into // an InstantOf instruction @@ -487,6 +518,17 @@ } break; } + case vmIntrinsics::_isPrimitive : { + assert(x->number_of_arguments() == 1, "wrong type"); + + // Class.isPrimitive is known on constant classes: + InstanceConstant* c = x->argument_at(0)->type()->as_InstanceConstant(); + if (c != NULL && !c->value()->is_null_object()) { + ciType* t = c->value()->java_mirror_type(); + set_constant(t->is_primitive_type()); + } + break; + } } }
--- a/src/share/vm/c1/c1_Compiler.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/c1/c1_Compiler.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -148,6 +148,7 @@ case vmIntrinsics::_longBitsToDouble: case vmIntrinsics::_getClass: case vmIntrinsics::_isInstance: + case vmIntrinsics::_isPrimitive: case vmIntrinsics::_currentThread: case vmIntrinsics::_dabs: case vmIntrinsics::_dsqrt: @@ -228,8 +229,6 @@ case vmIntrinsics::_getCharStringU: case vmIntrinsics::_putCharStringU: #ifdef TRACE_HAVE_INTRINSICS - case vmIntrinsics::_classID: - case vmIntrinsics::_threadID: case vmIntrinsics::_counterTime: #endif break;
--- a/src/share/vm/c1/c1_GraphBuilder.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/c1/c1_GraphBuilder.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1519,6 +1519,29 @@ append(new Return(x)); } +Value GraphBuilder::make_constant(ciConstant field_value, ciField* field) { + BasicType field_type = field_value.basic_type(); + ValueType* value = as_ValueType(field_value); + + // Attach dimension info to stable arrays. + if (FoldStableValues && + field->is_stable() && field_type == T_ARRAY && !field_value.is_null_or_zero()) { + ciArray* array = field_value.as_object()->as_array(); + jint dimension = field->type()->as_array_klass()->dimension(); + value = new StableArrayConstant(array, dimension); + } + + switch (field_type) { + case T_ARRAY: + case T_OBJECT: + if (field_value.as_object()->should_be_constant()) { + return new Constant(value); + } + return NULL; // Not a constant. + default: + return new Constant(value); + } +} void GraphBuilder::access_field(Bytecodes::Code code) { bool will_link; @@ -1563,22 +1586,13 @@ switch (code) { case Bytecodes::_getstatic: { // check for compile-time constants, i.e., initialized static final fields - Instruction* constant = NULL; + Value constant = NULL; if (field->is_constant() && !PatchALot) { - ciConstant field_val = field->constant_value(); - BasicType field_type = field_val.basic_type(); - switch (field_type) { - case T_ARRAY: - case T_OBJECT: - if (field_val.as_object()->should_be_constant()) { - constant = new Constant(as_ValueType(field_val)); - } - break; - - default: - constant = new Constant(as_ValueType(field_val)); - } + ciConstant field_value = field->constant_value(); // Stable static fields are checked for non-default values in ciField::initialize_from(). + assert(!field->is_stable() || !field_value.is_null_or_zero(), + "stable static w/ default value shouldn't be a constant"); + constant = make_constant(field_value, field); } if (constant != NULL) { push(type, append(constant)); @@ -1591,38 +1605,29 @@ } break; } - case Bytecodes::_putstatic: - { Value val = pop(type); - if (state_before == NULL) { - state_before = copy_state_for_exception(); - } - append(new StoreField(append(obj), offset, field, val, true, state_before, needs_patching)); + case Bytecodes::_putstatic: { + Value val = pop(type); + if (state_before == NULL) { + state_before = copy_state_for_exception(); } + append(new StoreField(append(obj), offset, field, val, true, state_before, needs_patching)); break; + } case Bytecodes::_getfield: { // Check for compile-time constants, i.e., trusted final non-static fields. - Instruction* constant = NULL; + Value constant = NULL; obj = apop(); ObjectType* obj_type = obj->type()->as_ObjectType(); if (obj_type->is_constant() && !PatchALot) { ciObject* const_oop = obj_type->constant_value(); if (!const_oop->is_null_object() && const_oop->is_loaded()) { if (field->is_constant()) { - ciConstant field_val = field->constant_value_of(const_oop); - BasicType field_type = field_val.basic_type(); - switch (field_type) { - case T_ARRAY: - case T_OBJECT: - if (field_val.as_object()->should_be_constant()) { - constant = new Constant(as_ValueType(field_val)); - } - break; - default: - constant = new Constant(as_ValueType(field_val)); - } - if (FoldStableValues && field->is_stable() && field_val.is_null_or_zero()) { + ciConstant field_value = field->constant_value_of(const_oop); + if (FoldStableValues && field->is_stable() && field_value.is_null_or_zero()) { // Stable field with default value can't be constant. constant = NULL; + } else { + constant = make_constant(field_value, field); } } else { // For CallSite objects treat the target field as a compile time constant. @@ -3942,7 +3947,7 @@ bool GraphBuilder::try_method_handle_inline(ciMethod* callee) { - ValueStack* state_before = state()->copy_for_parsing(); + ValueStack* state_before = copy_state_before(); vmIntrinsics::ID iid = callee->intrinsic_id(); switch (iid) { case vmIntrinsics::_invokeBasic: @@ -4032,7 +4037,7 @@ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); break; } - set_state(state_before); + set_state(state_before->copy_for_parsing()); return false; }
--- a/src/share/vm/c1/c1_GraphBuilder.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/c1/c1_GraphBuilder.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -276,6 +276,7 @@ void iterate_all_blocks(bool start_in_current_block_for_inlining = false); Dependencies* dependency_recorder() const; // = compilation()->dependencies() bool direct_compare(ciKlass* k); + Value make_constant(ciConstant value, ciField* field); void kill_all();
--- a/src/share/vm/c1/c1_LIRGenerator.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/c1/c1_LIRGenerator.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -43,6 +43,9 @@ #if INCLUDE_ALL_GCS #include "gc/g1/heapRegion.hpp" #endif // INCLUDE_ALL_GCS +#ifdef TRACE_HAVE_INTRINSICS +#include "trace/traceMacros.hpp" +#endif #ifdef ASSERT #define __ gen()->lir(__FILE__, __LINE__)-> @@ -1293,6 +1296,25 @@ __ move_wide(new LIR_Address(temp, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result); } +// java.lang.Class::isPrimitive() +void LIRGenerator::do_isPrimitive(Intrinsic* x) { + assert(x->number_of_arguments() == 1, "wrong type"); + + LIRItem rcvr(x->argument_at(0), this); + rcvr.load_item(); + LIR_Opr temp = new_register(T_METADATA); + LIR_Opr result = rlock_result(x); + + CodeEmitInfo* info = NULL; + if (x->needs_null_check()) { + info = state_for(x); + } + + __ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info); + __ cmp(lir_cond_notEqual, temp, LIR_OprFact::intConst(0)); + __ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN); +} + // Example: Thread.currentThread() void LIRGenerator::do_currentThread(Intrinsic* x) { @@ -3067,42 +3089,7 @@ __ move(reg, result); } -#ifdef TRACE_HAVE_INTRINSICS -void LIRGenerator::do_ThreadIDIntrinsic(Intrinsic* x) { - LIR_Opr thread = getThreadPointer(); - LIR_Opr osthread = new_pointer_register(); - __ move(new LIR_Address(thread, in_bytes(JavaThread::osthread_offset()), osthread->type()), osthread); - size_t thread_id_size = OSThread::thread_id_size(); - if (thread_id_size == (size_t) BytesPerLong) { - LIR_Opr id = new_register(T_LONG); - __ move(new LIR_Address(osthread, in_bytes(OSThread::thread_id_offset()), T_LONG), id); - __ convert(Bytecodes::_l2i, id, rlock_result(x)); - } else if (thread_id_size == (size_t) BytesPerInt) { - __ move(new LIR_Address(osthread, in_bytes(OSThread::thread_id_offset()), T_INT), rlock_result(x)); - } else { - ShouldNotReachHere(); - } -} - -void LIRGenerator::do_ClassIDIntrinsic(Intrinsic* x) { - CodeEmitInfo* info = state_for(x); - CodeEmitInfo* info2 = new CodeEmitInfo(info); // Clone for the second null check - BasicType klass_pointer_type = NOT_LP64(T_INT) LP64_ONLY(T_LONG); - assert(info != NULL, "must have info"); - LIRItem arg(x->argument_at(1), this); - arg.load_item(); - LIR_Opr klass = new_pointer_register(); - __ move(new LIR_Address(arg.result(), java_lang_Class::klass_offset_in_bytes(), klass_pointer_type), klass, info); - LIR_Opr id = new_register(T_LONG); - ByteSize offset = TRACE_ID_OFFSET; - LIR_Address* trace_id_addr = new LIR_Address(klass, in_bytes(offset), T_LONG); - __ move(trace_id_addr, id); - __ logical_or(id, LIR_OprFact::longConst(0x01l), id); - __ store(id, trace_id_addr); - __ logical_and(id, LIR_OprFact::longConst(~0x3l), id); - __ move(id, rlock_result(x)); -} -#endif + void LIRGenerator::do_Intrinsic(Intrinsic* x) { switch (x->id()) { @@ -3115,8 +3102,6 @@ } #ifdef TRACE_HAVE_INTRINSICS - case vmIntrinsics::_threadID: do_ThreadIDIntrinsic(x); break; - case vmIntrinsics::_classID: do_ClassIDIntrinsic(x); break; case vmIntrinsics::_counterTime: do_RuntimeCall(CAST_FROM_FN_PTR(address, TRACE_TIME_METHOD), x); break; @@ -3132,6 +3117,7 @@ case vmIntrinsics::_Object_init: do_RegisterFinalizer(x); break; case vmIntrinsics::_isInstance: do_isInstance(x); break; + case vmIntrinsics::_isPrimitive: do_isPrimitive(x); break; case vmIntrinsics::_getClass: do_getClass(x); break; case vmIntrinsics::_currentThread: do_currentThread(x); break;
--- a/src/share/vm/c1/c1_LIRGenerator.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/c1/c1_LIRGenerator.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -246,6 +246,7 @@ void do_RegisterFinalizer(Intrinsic* x); void do_isInstance(Intrinsic* x); + void do_isPrimitive(Intrinsic* x); void do_getClass(Intrinsic* x); void do_currentThread(Intrinsic* x); void do_MathIntrinsic(Intrinsic* x); @@ -440,10 +441,7 @@ void do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegin* default_sux); void do_RuntimeCall(address routine, Intrinsic* x); -#ifdef TRACE_HAVE_INTRINSICS - void do_ThreadIDIntrinsic(Intrinsic* x); - void do_ClassIDIntrinsic(Intrinsic* x); -#endif + ciKlass* profile_type(ciMethodData* md, int md_first_offset, int md_offset, intptr_t profiled_k, Value arg, LIR_Opr& mdp, bool not_null, ciKlass* signature_at_call_k, ciKlass* callee_signature_k);
--- a/src/share/vm/c1/c1_Runtime1.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/c1/c1_Runtime1.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -335,6 +335,7 @@ NOT_PRODUCT(_new_instance_slowcase_cnt++;) assert(klass->is_klass(), "not a class"); + Handle holder(THREAD, klass->klass_holder()); // keep the klass alive instanceKlassHandle h(thread, klass); h->check_valid_for_instantiation(true, CHECK); // make sure klass is initialized @@ -370,6 +371,7 @@ // anymore after new_objArray() and no GC can happen before. // (This may have to change if this code changes!) assert(array_klass->is_klass(), "not a class"); + Handle holder(THREAD, array_klass->klass_holder()); // keep the klass alive Klass* elem_klass = ObjArrayKlass::cast(array_klass)->element_klass(); objArrayOop obj = oopFactory::new_objArray(elem_klass, length, CHECK); thread->set_vm_result(obj); @@ -386,6 +388,7 @@ assert(klass->is_klass(), "not a class"); assert(rank >= 1, "rank must be nonzero"); + Handle holder(THREAD, klass->klass_holder()); // keep the klass alive oop obj = ArrayKlass::cast(klass)->multi_allocate(rank, dims, CHECK); thread->set_vm_result(obj); JRT_END
--- a/src/share/vm/c1/c1_ValueType.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/c1/c1_ValueType.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -45,6 +45,7 @@ class ObjectConstant; class ArrayType; class ArrayConstant; +class StableArrayConstant; class InstanceType; class InstanceConstant; class MetadataType; @@ -168,6 +169,7 @@ virtual MethodConstant* as_MethodConstant() { return NULL; } virtual MethodDataConstant* as_MethodDataConstant() { return NULL; } virtual ArrayConstant* as_ArrayConstant() { return NULL; } + virtual StableArrayConstant* as_StableArrayConstant() { return NULL; } virtual AddressConstant* as_AddressConstant() { return NULL; } // type operations @@ -355,6 +357,20 @@ virtual ciType* exact_type() const; }; +class StableArrayConstant: public ArrayConstant { + private: + jint _dimension; + + public: + StableArrayConstant(ciArray* value, jint dimension) : ArrayConstant(value) { + assert(dimension > 0, "not a stable array"); + _dimension = dimension; + } + + jint dimension() const { return _dimension; } + + virtual StableArrayConstant* as_StableArrayConstant() { return this; } +}; class InstanceType: public ObjectType { public:
--- a/src/share/vm/ci/ciArray.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/ci/ciArray.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -107,8 +107,9 @@ intptr_t header = arrayOopDesc::base_offset_in_bytes(elembt); intptr_t index = (element_offset - header) >> shift; intptr_t offset = header + ((intptr_t)index << shift); - if (offset != element_offset || index != (jint)index) + if (offset != element_offset || index != (jint)index || index < 0 || index >= length()) { return ciConstant(); + } return element_value((jint) index); }
--- a/src/share/vm/ci/ciMethodData.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/ci/ciMethodData.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -81,7 +81,7 @@ void ciMethodData::load_extra_data() { MethodData* mdo = get_MethodData(); - MutexLocker(mdo->extra_data_lock()); + MutexLocker ml(mdo->extra_data_lock()); // speculative trap entries also hold a pointer to a Method so need to be translated DataLayout* dp_src = mdo->extra_data_base(); @@ -103,16 +103,13 @@ switch(tag) { case DataLayout::speculative_trap_data_tag: { - ciSpeculativeTrapData* data_dst = new ciSpeculativeTrapData(dp_dst); - SpeculativeTrapData* data_src = new SpeculativeTrapData(dp_src); - - data_dst->translate_from(data_src); + ciSpeculativeTrapData data_dst(dp_dst); + SpeculativeTrapData data_src(dp_src); -#ifdef ASSERT - SpeculativeTrapData* data_src2 = new SpeculativeTrapData(dp_src); - assert(data_src2->method() == data_src->method() && data_src2->bci() == data_src->bci(), "entries changed while translating"); -#endif - + { // During translation a safepoint can happen or VM lock can be taken (e.g., Compile_lock). + MutexUnlocker ml(mdo->extra_data_lock()); + data_dst.translate_from(&data_src); + } break; } case DataLayout::bit_data_tag: @@ -120,9 +117,11 @@ case DataLayout::no_tag: case DataLayout::arg_info_data_tag: // An empty slot or ArgInfoData entry marks the end of the trap data - return; + { + return; // Need a block to avoid SS compiler bug + } default: - fatal("bad tag = %d", dp_dst->tag()); + fatal("bad tag = %d", tag); } } }
--- a/src/share/vm/classfile/classFileParser.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/classfile/classFileParser.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -5380,7 +5380,7 @@ } } - TRACE_INIT_ID(ik); + TRACE_INIT_KLASS_ID(ik); // If we reach here, all is well. // Now remove the InstanceKlass* from the _klass_to_deallocate field
--- a/src/share/vm/classfile/classLoader.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/classfile/classLoader.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -37,6 +37,7 @@ #include "gc/shared/generation.hpp" #include "interpreter/bytecodeStream.hpp" #include "interpreter/oopMapCache.hpp" +#include "logging/logTag.hpp" #include "memory/allocation.inline.hpp" #include "memory/filemap.hpp" #include "memory/oopFactory.hpp" @@ -417,34 +418,30 @@ #if INCLUDE_CDS void ClassLoader::exit_with_path_failure(const char* error, const char* message) { assert(DumpSharedSpaces, "only called at dump time"); - tty->print_cr("Hint: enable -XX:+TraceClassPaths to diagnose the failure"); + tty->print_cr("Hint: enable -Xlog:classpath=info to diagnose the failure"); vm_exit_during_initialization(error, message); } #endif -void ClassLoader::trace_class_path(outputStream* out, const char* msg, const char* name) { - if (!TraceClassPaths) { - return; - } - - if (msg) { - out->print("%s", msg); - } - if (name) { - if (strlen(name) < 256) { - out->print("%s", name); - } else { - // For very long paths, we need to print each character separately, - // as print_cr() has a length limit - while (name[0] != '\0') { - out->print("%c", name[0]); - name++; +void ClassLoader::trace_class_path(const char* msg, const char* name) { + if (log_is_enabled(Info, classpath)) { + ResourceMark rm; + outputStream* out = LogHandle(classpath)::info_stream(); + if (msg) { + out->print("%s", msg); + } + if (name) { + if (strlen(name) < 256) { + out->print("%s", name); + } else { + // For very long paths, we need to print each character separately, + // as print_cr() has a length limit + while (name[0] != '\0') { + out->print("%c", name[0]); + name++; + } } } - } - if (msg && msg[0] == '[') { - out->print_cr("]"); - } else { out->cr(); } } @@ -470,11 +467,13 @@ void ClassLoader::setup_bootstrap_search_path() { assert(_first_entry == NULL, "should not setup bootstrap class search path twice"); const char* sys_class_path = Arguments::get_sysclasspath(); + const char* java_class_path = Arguments::get_appclasspath(); if (PrintSharedArchiveAndExit) { // Don't print sys_class_path - this is the bootcp of this current VM process, not necessarily // the same as the bootcp of the shared archive. } else { - trace_class_path(tty, "[Bootstrap loader class path=", sys_class_path); + trace_class_path("bootstrap loader class path=", sys_class_path); + trace_class_path("classpath: ", java_class_path); } #if INCLUDE_CDS if (DumpSharedSpaces) { @@ -578,9 +577,7 @@ } } } - if (TraceClassPaths) { - tty->print_cr("[Opened %s]", path); - } + log_info(classpath)("opened: %s", path); log_info(classload)("opened: %s", path); } else { // Directory
--- a/src/share/vm/classfile/classLoader.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/classfile/classLoader.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -331,7 +331,7 @@ static void exit_with_path_failure(const char* error, const char* message); #endif - static void trace_class_path(outputStream* out, const char* msg, const char* name = NULL); + static void trace_class_path(const char* msg, const char* name = NULL); // VM monitoring and management support static jlong classloader_time_ms();
--- a/src/share/vm/classfile/dictionary.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/classfile/dictionary.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -135,8 +135,10 @@ // via a store to _pd_set. OrderAccess::release_store_ptr(&_pd_set, new_head); } - if (TraceProtectionDomainVerification && WizardMode) { - print(); + if (log_is_enabled(Trace, protectiondomain)) { + ResourceMark rm; + outputStream* log = LogHandle(protectiondomain)::trace_stream(); + print_count(log); } }
--- a/src/share/vm/classfile/dictionary.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/classfile/dictionary.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -29,6 +29,7 @@ #include "oops/instanceKlass.hpp" #include "oops/oop.hpp" #include "utilities/hashtable.hpp" +#include "utilities/ostream.hpp" class DictionaryEntry; class PSPromotionManager; @@ -323,14 +324,14 @@ return (klass->name() == class_name && _loader_data == loader_data); } - void print() { + void print_count(outputStream *st) { int count = 0; for (ProtectionDomainEntry* current = _pd_set; current != NULL; current = current->_next) { count++; } - tty->print_cr("pd set = #%d", count); + st->print_cr("pd set count = #%d", count); } };
--- a/src/share/vm/classfile/klassFactory.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/classfile/klassFactory.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ /* -* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. +* Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,6 +29,7 @@ #include "classfile/klassFactory.hpp" #include "memory/resourceArea.hpp" #include "prims/jvmtiEnvBase.hpp" +#include "trace/traceMacros.hpp" static ClassFileStream* prologue(ClassFileStream* stream, Symbol* name, @@ -136,5 +137,7 @@ result->set_cached_class_file(cached_class_file); } + TRACE_KLASS_CREATION(result, parser, THREAD); + return result; }
--- a/src/share/vm/classfile/sharedPathsMiscInfo.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/classfile/sharedPathsMiscInfo.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,15 +26,15 @@ #include "classfile/classLoader.hpp" #include "classfile/classLoaderData.inline.hpp" #include "classfile/sharedPathsMiscInfo.hpp" +#include "logging/log.hpp" #include "memory/allocation.inline.hpp" #include "memory/metaspaceShared.hpp" #include "runtime/arguments.hpp" +#include "utilities/ostream.hpp" void SharedPathsMiscInfo::add_path(const char* path, int type) { - if (TraceClassPaths) { - tty->print("[type=%s] ", type_name(type)); - trace_class_path("[Add misc shared path ", path); - } + log_info(classpath)("type=%s ", type_name(type)); + ClassLoader::trace_class_path("add misc shared path ", path); write(path, strlen(path) + 1); write_jint(jint(type)); } @@ -67,11 +67,29 @@ } bool SharedPathsMiscInfo::fail(const char* msg, const char* name) { - ClassLoader::trace_class_path(tty, msg, name); + ClassLoader::trace_class_path(msg, name); MetaspaceShared::set_archive_loading_failed(); return false; } +void SharedPathsMiscInfo::print_path(int type, const char* path) { + ResourceMark rm; + outputStream* out = LogHandle(classpath)::info_stream(); + switch (type) { + case BOOT: + out->print("Expecting -Dsun.boot.class.path=%s", path); + break; + case NON_EXIST: + out->print("Expecting that %s does not exist", path); + break; + case REQUIRED: + out->print("Expecting that file %s must exist and is not altered", path); + break; + default: + ShouldNotReachHere(); + } +} + bool SharedPathsMiscInfo::check() { // The whole buffer must be 0 terminated so that we can use strlen and strcmp // without fear. @@ -90,17 +108,14 @@ if (!read_jint(&type)) { return fail("Corrupted archive file header"); } - if (TraceClassPaths) { - tty->print("[type=%s ", type_name(type)); - print_path(tty, type, path); - tty->print_cr("]"); - } + log_info(classpath)("type=%s ", type_name(type)); + print_path(type, path); if (!check(type, path)) { if (!PrintSharedArchiveAndExit) { return false; } } else { - trace_class_path("[ok"); + ClassLoader::trace_class_path("ok"); } }
--- a/src/share/vm/classfile/sharedPathsMiscInfo.hpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/classfile/sharedPathsMiscInfo.hpp Thu Mar 17 17:03:20 2016 +0000 @@ -64,9 +64,6 @@ void write(const void* ptr, size_t size); bool read(void* ptr, size_t size); - static void trace_class_path(const char* msg, const char* name = NULL) { - ClassLoader::trace_class_path(tty, msg, name); - } protected: static bool fail(const char* msg, const char* name = NULL); virtual bool check(jint type, const char* path); @@ -144,21 +141,7 @@ } } - virtual void print_path(outputStream* out, int type, const char* path) { - switch (type) { - case BOOT: - out->print("Expecting -Dsun.boot.class.path=%s", path); - break; - case NON_EXIST: - out->print("Expecting that %s does not exist", path); - break; - case REQUIRED: - out->print("Expecting that file %s must exist and is not altered", path); - break; - default: - ShouldNotReachHere(); - } - } + virtual void print_path(int type, const char* path); bool check(); bool read_jint(jint *ptr) {
--- a/src/share/vm/classfile/stringTable.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/classfile/stringTable.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -200,7 +200,6 @@ return string; } - oop StringTable::intern(Handle string_or_null, jchar* name, int len, TRAPS) { oop found_string = lookup_shared(name, len); @@ -214,7 +213,9 @@ // Found if (found_string != NULL) { - ensure_string_alive(found_string); + if (found_string != string_or_null()) { + ensure_string_alive(found_string); + } return found_string; } @@ -249,7 +250,9 @@ hashValue, CHECK_NULL); } - ensure_string_alive(added_or_found); + if (added_or_found != string()) { + ensure_string_alive(added_or_found); + } return added_or_found; }
--- a/src/share/vm/classfile/systemDictionary.cpp Thu Feb 25 14:59:44 2016 +0000 +++ b/src/share/vm/classfile/systemDictionary.cpp Thu Mar 17 17:03:20 2016 +0000 @@ -430,12 +430,15 @@ // Now we have to call back to java to check if the initating class has access JavaValue result(T_VOID); - if (TraceProtectionDomainVerification) { + if (log_is_enabled(Debug, protectiondomain)) { + ResourceMark rm; // Print out trace information - tty->print_cr("Checking package access"); - tty->print(" - class loader: "); class_loader()->print_value_on(tty); tty->cr(); - tty->print(" - protection domain: "); protection_domain()->print_value_on(tty); tty->cr(); - tty->print(" - loading: "); klass()->print_value_on(tty); tty->cr(); + outputStream* log = LogHandle(protectiondomain)::debug_stream(); + log->print_cr("Checking package access"); + log->print("class loader: "); class_loader()->print_value_on(log); + log->print(" protection domain: "); protection_domain()->print_value_on(log); + log->print(" loading: "); klass()->print_value_on(log); + log->cr(); } KlassHandle system_loader(THREAD, SystemDictionary::ClassLoader_klass()); @@ -448,13 +451,10 @@ protection_domain, THREAD); - if (TraceProtectionDomainVerification) { - if (HAS_PENDING_EXCEPTION) { - tty->print_cr(" -> DENIED !!!!!!!!!!!!!!!!!!!!!"); - } else { - tty->print_cr(" -> granted"); - } - tty->cr(); + if (HAS_PENDING_EXCEPTION) { + log_debug(protectiondomain)("DENIED !!!!!!!!!!!