diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index e4c05acb684b7..d5958853701ca 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -384,6 +384,7 @@ jobs:
- build-windows-aarch64
- test-linux-x64
- test-macos-x64
+ - test-macos-aarch64
- test-windows-x64
steps:
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000000000..f4c5e7e67cb46
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,3 @@
+# JDK Vulnerabilities
+
+Please follow the process outlined in the [OpenJDK Vulnerability Policy](https://openjdk.org/groups/vulnerability/report) to disclose vulnerabilities in the JDK.
diff --git a/doc/building.html b/doc/building.html
index 707531553124b..c91d876246cde 100644
--- a/doc/building.html
+++ b/doc/building.html
@@ -614,10 +614,9 @@
clang
--with-toolchain-type=clang
.
Apple Xcode
The oldest supported version of Xcode is 13.0.
-You will need the Xcode command line developer tools to be able to
-build the JDK. (Actually, only the command line tools are
-needed, not the IDE.) The simplest way to install these is to run:
-xcode-select --install
+You will need to download Xcode either from the App Store or specific
+versions can be easily located via the Xcode Releases website.
When updating Xcode, it is advisable to keep an older version for
building the JDK. To use a specific version of Xcode you have multiple
options:
diff --git a/doc/building.md b/doc/building.md
index 51ac0cad7d98b..47ad9e7c72b4c 100644
--- a/doc/building.md
+++ b/doc/building.md
@@ -422,13 +422,9 @@ To use clang instead of gcc on Linux, use `--with-toolchain-type=clang`.
The oldest supported version of Xcode is 13.0.
-You will need the Xcode command line developer tools to be able to build the
-JDK. (Actually, *only* the command line tools are needed, not the IDE.) The
-simplest way to install these is to run:
-
-```
-xcode-select --install
-```
+You will need to download Xcode either from the App Store or specific versions
+can be easily located via the [Xcode Releases](https://xcodereleases.com)
+website.
When updating Xcode, it is advisable to keep an older version for building the
JDK. To use a specific version of Xcode you have multiple options:
diff --git a/make/Main.gmk b/make/Main.gmk
index 46150839f8029..4b3efaf651ec9 100644
--- a/make/Main.gmk
+++ b/make/Main.gmk
@@ -568,6 +568,10 @@ $(eval $(call SetupTarget, update-build-docs, \
MAKEFILE := UpdateBuildDocs, \
))
+$(eval $(call SetupTarget, update-sleef-source, \
+ MAKEFILE := UpdateSleefSource, \
+))
+
$(eval $(call SetupTarget, update-x11wrappers, \
MAKEFILE := UpdateX11Wrappers, \
DEPS := java.base-copy buildtools-jdk, \
diff --git a/make/UpdateSleefSource.gmk b/make/UpdateSleefSource.gmk
new file mode 100644
index 0000000000000..37a28abcb85c2
--- /dev/null
+++ b/make/UpdateSleefSource.gmk
@@ -0,0 +1,153 @@
+#
+# Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation. Oracle designates this
+# particular file as subject to the "Classpath" exception as provided
+# by Oracle in the LICENSE file that accompanied this code.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+
+################################################################################
+
+default: all
+
+include $(SPEC)
+include MakeBase.gmk
+
+include CopyFiles.gmk
+include Execute.gmk
+
+################################################################################
+# This file is responsible for updating the generated sleef source code files
+# that are checked in to the JDK repo, and that are actually used when building.
+# This target needs to be re-run every time the source code of libsleef is
+# updated from upstream.
+################################################################################
+
+ifneq ($(COMPILE_TYPE), cross)
+ $(error Only cross-compilation of libsleef is currently supported)
+endif
+
+ifeq ($(CMAKE), )
+ $(error CMake not found. Please install cmake and rerun configure)
+endif
+
+ifneq ($(OPENJDK_BUILD_OS), linux)
+ $(error This target is only supported on linux)
+endif
+
+SLEEF_SUPPORT_DIR := $(MAKESUPPORT_OUTPUTDIR)/sleef
+SLEEF_SOURCE_BASE_DIR := $(TOPDIR)/src/jdk.incubator.vector/linux/native/libsleef
+SLEEF_SOURCE_DIR := $(SLEEF_SOURCE_BASE_DIR)/upstream
+SLEEF_TARGET_DIR := $(SLEEF_SOURCE_BASE_DIR)/generated
+SLEEF_NATIVE_BUILD_DIR := $(SLEEF_SUPPORT_DIR)/native
+SLEEF_CROSS_BUILD_DIR := $(SLEEF_SUPPORT_DIR)/cross
+
+ifeq ($(OPENJDK_TARGET_CPU), aarch64)
+ CROSS_COMPILATION_FILENAMES := sleefinline_advsimd.h sleefinline_sve.h
+ EXTRA_CROSS_OPTIONS := -DSLEEF_ENFORCE_SVE=TRUE
+else ifeq ($(OPENJDK_TARGET_CPU), riscv64)
+ CROSS_COMPILATION_FILENAMES := sleefinline_rvvm1.h
+ EXTRA_CROSS_OPTIONS := -DSLEEF_ENFORCE_RVVM1=TRUE
+else
+ $(error Unsupported platform)
+endif
+CROSS_COMPILATION_SRC_FILES := $(addprefix $(SLEEF_CROSS_BUILD_DIR)/include/, \
+ $(CROSS_COMPILATION_FILENAMES))
+
+ifeq ($(TOOLCHAIN_TYPE), clang)
+ SLEEF_TOOLCHAIN_TYPE := llvm
+else
+ SLEEF_TOOLCHAIN_TYPE := $(TOOLCHAIN_TYPE)
+endif
+
+SLEEF_CMAKE_FILE := toolchains/$(OPENJDK_TARGET_CPU)-$(SLEEF_TOOLCHAIN_TYPE).cmake
+
+# We need to run CMake twice, first using it to configure the build, and then
+# to actually build; and we need to do this twice, once for a native build
+# and once for the cross-compilation build.
+
+$(eval $(call SetupExecute, sleef_native_config, \
+ INFO := Configuring native sleef build, \
+ OUTPUT_DIR := $(SLEEF_NATIVE_BUILD_DIR), \
+ COMMAND := cd $(SLEEF_SOURCE_DIR) && $(CMAKE) -S . -B \
+ $(SLEEF_NATIVE_BUILD_DIR), \
+))
+
+TARGETS := $(sleef_native_config)
+
+$(eval $(call SetupExecute, sleef_native_build, \
+ INFO := Building native sleef, \
+ DEPS := $(sleef_native_config), \
+ OUTPUT_DIR := $(SLEEF_NATIVE_BUILD_DIR), \
+ COMMAND := cd $(SLEEF_SOURCE_DIR) && $(CMAKE) --build \
+ $(SLEEF_NATIVE_BUILD_DIR) -j, \
+))
+
+TARGETS := $(sleef_native_build)
+
+$(eval $(call SetupExecute, sleef_cross_config, \
+ INFO := Configuring cross-compiling sleef build, \
+ DEPS := $(sleef_native_build), \
+ OUTPUT_DIR := $(SLEEF_CROSS_BUILD_DIR), \
+ COMMAND := cd $(SLEEF_SOURCE_DIR) && $(CMAKE) -S . -B \
+ $(SLEEF_CROSS_BUILD_DIR) \
+ -DCMAKE_C_COMPILER=$(CC) \
+ -DCMAKE_TOOLCHAIN_FILE=$(SLEEF_CMAKE_FILE) \
+ -DNATIVE_BUILD_DIR=$(SLEEF_NATIVE_BUILD_DIR) \
+ -DSLEEF_BUILD_INLINE_HEADERS=TRUE \
+ $(EXTRA_CROSS_OPTIONS), \
+))
+
+TARGETS := $(sleef_cross_config)
+
+$(eval $(call SetupExecute, sleef_cross_build, \
+ INFO := Building cross-compiling sleef, \
+ DEPS := $(sleef_cross_config), \
+ OUTPUT_DIR := $(SLEEF_NATIVE_BUILD_DIR), \
+ COMMAND := cd $(SLEEF_SOURCE_DIR) && $(CMAKE) --build \
+ $(SLEEF_CROSS_BUILD_DIR) -j, \
+))
+
+TARGETS := $(sleef_cross_build)
+
+$(CROSS_COMPILATION_SRC_FILES): $(sleef_cross_build)
+
+# Finally, copy the generated files (and one needed static file) into our
+# target directory.
+
+$(eval $(call SetupCopyFiles, copy_static_sleef_source, \
+ FILES := $(SLEEF_SOURCE_DIR)/src/common/misc.h, \
+ DEST := $(SLEEF_TARGET_DIR), \
+))
+
+TARGETS := $(copy_static_sleef_source)
+
+$(eval $(call SetupCopyFiles, copy_generated_sleef_source, \
+ FILES := $(CROSS_COMPILATION_SRC_FILES), \
+ DEST := $(SLEEF_TARGET_DIR), \
+))
+
+TARGETS := $(copy_generated_sleef_source)
+
+################################################################################
+
+all: $(TARGETS)
+
+.PHONY: all default
diff --git a/make/autoconf/basic_tools.m4 b/make/autoconf/basic_tools.m4
index 6bfaecb2e6900..eceb0ae6cc44f 100644
--- a/make/autoconf/basic_tools.m4
+++ b/make/autoconf/basic_tools.m4
@@ -99,6 +99,7 @@ AC_DEFUN_ONCE([BASIC_SETUP_TOOLS],
UTIL_REQUIRE_SPECIAL(FGREP, [AC_PROG_FGREP])
# Optional tools, we can do without them
+ UTIL_LOOKUP_PROGS(CMAKE, cmake)
UTIL_LOOKUP_PROGS(DF, df)
UTIL_LOOKUP_PROGS(GIT, git)
UTIL_LOOKUP_PROGS(NICE, nice)
diff --git a/make/autoconf/spec.gmk.template b/make/autoconf/spec.gmk.template
index d5e08cdb64050..20b1d00aa893f 100644
--- a/make/autoconf/spec.gmk.template
+++ b/make/autoconf/spec.gmk.template
@@ -719,6 +719,7 @@ CCACHE := @CCACHE@
# CD is going away, but remains to cater for legacy makefiles.
CD := cd
CHMOD := @CHMOD@
+CMAKE := @CMAKE@
CODESIGN := @CODESIGN@
CP := @CP@
CUT := @CUT@
diff --git a/make/common/modules/LauncherCommon.gmk b/make/common/modules/LauncherCommon.gmk
index ef65f9e431f00..7c2cef58835c2 100644
--- a/make/common/modules/LauncherCommon.gmk
+++ b/make/common/modules/LauncherCommon.gmk
@@ -74,7 +74,7 @@ define SetupBuildLauncherBody
endif
ifneq ($$($1_MAIN_CLASS), )
- $1_JAVA_ARGS += -ms8m
+ $1_JAVA_ARGS += -Xms8m
$1_LAUNCHER_CLASS := -m $$($1_MAIN_MODULE)/$$($1_MAIN_CLASS)
endif
diff --git a/make/devkit/createAutoconfBundle.sh b/make/devkit/createAutoconfBundle.sh
index 861a0a47242c0..7363b9cd8a71a 100644
--- a/make/devkit/createAutoconfBundle.sh
+++ b/make/devkit/createAutoconfBundle.sh
@@ -1,6 +1,6 @@
#!/bin/bash -e
#
-# Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -25,50 +25,70 @@
#
# Create a bundle in the current directory, containing what's needed to run
-# the 'autoconf' program by the OpenJDK build.
+# the 'autoconf' program by the OpenJDK build. To override TARGET_PLATFORM
+# just set the variable before running this script.
# Autoconf depends on m4, so download and build that first.
AUTOCONF_VERSION=2.69
M4_VERSION=1.4.18
PACKAGE_VERSION=1.0.1
-TARGET_PLATFORM=linux_x86
+case `uname -s` in
+ Darwin)
+ os=macosx
+ ;;
+ Linux)
+ os=linux
+ ;;
+ CYGWIN*)
+ os=cygwin
+ ;;
+esac
+case `uname -m` in
+ arm64|aarch64)
+ arch=aarch64
+ ;;
+ amd64|x86_64|x64)
+ arch=x64
+ ;;
+esac
+TARGET_PLATFORM=${TARGET_PLATFORM:="${os}_${arch}"}
+
MODULE_NAME=autoconf-$TARGET_PLATFORM-$AUTOCONF_VERSION+$PACKAGE_VERSION
BUNDLE_NAME=$MODULE_NAME.tar.gz
-TMPDIR=`mktemp -d -t autoconfbundle-XXXX`
-trap "rm -rf \"$TMPDIR\"" EXIT
+SCRIPT_DIR="$(cd "$(dirname $0)" > /dev/null && pwd)"
+OUTPUT_ROOT="${SCRIPT_DIR}/../../build/autoconf"
-ORIG_DIR=`pwd`
-cd $TMPDIR
-OUTPUT_DIR=$TMPDIR/$MODULE_NAME
-mkdir -p $OUTPUT_DIR/usr
+cd $OUTPUT_ROOT
+IMAGE_DIR=$OUTPUT_ROOT/$MODULE_NAME
+mkdir -p $IMAGE_DIR/usr
# Download and build m4
if test "x$TARGET_PLATFORM" = xcygwin_x64; then
# On cygwin 64-bit, just copy the cygwin .exe file
- mkdir -p $OUTPUT_DIR/usr/bin
- cp /usr/bin/m4 $OUTPUT_DIR/usr/bin
+ mkdir -p $IMAGE_DIR/usr/bin
+ cp /usr/bin/m4 $IMAGE_DIR/usr/bin
elif test "x$TARGET_PLATFORM" = xcygwin_x86; then
# On cygwin 32-bit, just copy the cygwin .exe file
- mkdir -p $OUTPUT_DIR/usr/bin
- cp /usr/bin/m4 $OUTPUT_DIR/usr/bin
+ mkdir -p $IMAGE_DIR/usr/bin
+ cp /usr/bin/m4 $IMAGE_DIR/usr/bin
elif test "x$TARGET_PLATFORM" = xlinux_x64; then
M4_VERSION=1.4.13-5
wget http://yum.oracle.com/repo/OracleLinux/OL6/latest/x86_64/getPackage/m4-$M4_VERSION.el6.x86_64.rpm
- cd $OUTPUT_DIR
- rpm2cpio ../m4-$M4_VERSION.el6.x86_64.rpm | cpio -d -i
+ cd $IMAGE_DIR
+ rpm2cpio $OUTPUT_ROOT/m4-$M4_VERSION.el6.x86_64.rpm | cpio -d -i
elif test "x$TARGET_PLATFORM" = xlinux_x86; then
M4_VERSION=1.4.13-5
wget http://yum.oracle.com/repo/OracleLinux/OL6/latest/i386/getPackage/m4-$M4_VERSION.el6.i686.rpm
- cd $OUTPUT_DIR
- rpm2cpio ../m4-$M4_VERSION.el6.i686.rpm | cpio -d -i
+ cd $IMAGE_DIR
+ rpm2cpio $OUTPUT_ROOT/m4-$M4_VERSION.el6.i686.rpm | cpio -d -i
else
wget https://ftp.gnu.org/gnu/m4/m4-$M4_VERSION.tar.gz
tar xzf m4-$M4_VERSION.tar.gz
cd m4-$M4_VERSION
- ./configure --prefix=$OUTPUT_DIR/usr
+ ./configure --prefix=$IMAGE_DIR/usr CFLAGS="-w -Wno-everything"
make
make install
cd ..
@@ -79,15 +99,14 @@ fi
wget https://ftp.gnu.org/gnu/autoconf/autoconf-$AUTOCONF_VERSION.tar.gz
tar xzf autoconf-$AUTOCONF_VERSION.tar.gz
cd autoconf-$AUTOCONF_VERSION
-./configure --prefix=$OUTPUT_DIR/usr M4=$OUTPUT_DIR/usr/bin/m4
+./configure --prefix=$IMAGE_DIR/usr M4=$IMAGE_DIR/usr/bin/m4
make
make install
cd ..
-perl -pi -e "s!$OUTPUT_DIR/!./!" $OUTPUT_DIR/usr/bin/auto* $OUTPUT_DIR/usr/share/autoconf/autom4te.cfg
-cp $OUTPUT_DIR/usr/share/autoconf/autom4te.cfg $OUTPUT_DIR/autom4te.cfg
+perl -pi -e "s!$IMAGE_DIR/!./!" $IMAGE_DIR/usr/bin/auto* $IMAGE_DIR/usr/share/autoconf/autom4te.cfg
-cat > $OUTPUT_DIR/autoconf << EOF
+cat > $IMAGE_DIR/autoconf << EOF
#!/bin/bash
# Get an absolute path to this script
this_script_dir=\`dirname \$0\`
@@ -100,17 +119,10 @@ export AUTOHEADER="\$this_script_dir/usr/bin/autoheader"
export AC_MACRODIR="\$this_script_dir/usr/share/autoconf"
export autom4te_perllibdir="\$this_script_dir/usr/share/autoconf"
-autom4te_cfg=\$this_script_dir/usr/share/autoconf/autom4te.cfg
-cp \$this_script_dir/autom4te.cfg \$autom4te_cfg
-
-echo 'begin-language: "M4sugar"' >> \$autom4te_cfg
-echo "args: --prepend-include '"\$this_script_dir/usr/share/autoconf"'" >> \$autom4te_cfg
-echo 'end-language: "M4sugar"' >> \$autom4te_cfg
+PREPEND_INCLUDE="--prepend-include \$this_script_dir/usr/share/autoconf"
-exec \$this_script_dir/usr/bin/autoconf "\$@"
+exec \$this_script_dir/usr/bin/autoconf \$PREPEND_INCLUDE "\$@"
EOF
-chmod +x $OUTPUT_DIR/autoconf
-cd $OUTPUT_DIR
-tar -cvzf ../$BUNDLE_NAME *
-cd ..
-cp $BUNDLE_NAME "$ORIG_DIR"
+chmod +x $IMAGE_DIR/autoconf
+cd $IMAGE_DIR
+tar -cvzf $OUTPUT_ROOT/$BUNDLE_NAME *
diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
index 8dada3cec0a1d..ddb2c3e33e513 100644
--- a/make/hotspot/gensrc/GensrcAdlc.gmk
+++ b/make/hotspot/gensrc/GensrcAdlc.gmk
@@ -200,6 +200,13 @@ ifeq ($(call check-jvm-feature, compiler2), true)
)))
endif
+ ifeq ($(call check-jvm-feature, g1gc), true)
+ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/g1/g1_$(HOTSPOT_TARGET_CPU).ad \
+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/g1/g1_$(HOTSPOT_TARGET_CPU_ARCH).ad \
+ )))
+ endif
+
SINGLE_AD_SRCFILE := $(ADLC_SUPPORT_DIR)/all-ad-src.ad
INSERT_FILENAME_AWK_SCRIPT := \
diff --git a/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java b/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java
index 0ca2a226a960c..8865e3908ae6b 100644
--- a/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java
+++ b/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java
@@ -786,7 +786,10 @@ private static Map extractZoneNames(Map map, Str
String tzKey = Optional.ofNullable((String)handlerSupplMeta.get(tzid))
.orElse(tzid);
// Follow link, if needed
- var tzLink = tzdbLinks.get(tzKey);
+ String tzLink = null;
+ for (var k = tzKey; tzdbLinks.containsKey(k);) {
+ k = tzLink = tzdbLinks.get(k);
+ }
if (tzLink == null && tzdbLinks.containsValue(tzKey)) {
// reverse link search
// this is needed as in tzdb, "America/Buenos_Aires" links to
@@ -1214,7 +1217,7 @@ private static void generateZoneName() throws Exception {
private static Set getAvailableZoneIds() {
assert handlerMetaZones != null;
if (AVAILABLE_TZIDS == null) {
- AVAILABLE_TZIDS = new HashSet<>(ZoneId.getAvailableZoneIds());
+ AVAILABLE_TZIDS = new HashSet<>(Arrays.asList(TimeZone.getAvailableIDs()));
AVAILABLE_TZIDS.addAll(handlerMetaZones.keySet());
AVAILABLE_TZIDS.remove(MetaZonesParseHandler.NO_METAZONE_KEY);
}
@@ -1490,13 +1493,14 @@ private static void fillTZDBShortNames(String tzid, String[] names) {
/*
* Convert TZDB offsets to JDK's offsets, eg, "-08" to "GMT-08:00".
* If it cannot recognize the pattern, return the argument as is.
+ * Returning null results in generating the GMT format at runtime.
*/
private static String convertGMTName(String f) {
try {
- // Should pre-fill GMT format once COMPAT is gone.
- // Till then, fall back to GMT format at runtime, after COMPAT short
- // names are populated
- ZoneOffset.of(f);
+ if (!f.equals("%z")) {
+ // Validate if the format is an offset
+ ZoneOffset.of(f);
+ }
return null;
} catch (DateTimeException dte) {
// textual representation. return as is
diff --git a/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesCompiler.java b/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesCompiler.java
index 630d3a390d18a..426d0bb10ede1 100644
--- a/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesCompiler.java
+++ b/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesCompiler.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -273,7 +273,7 @@ private void outputFile(Path dstFile, String version,
// link version-region-rules
out.writeShort(builtZones.size());
for (Map.Entry entry : builtZones.entrySet()) {
- int regionIndex = Arrays.binarySearch(regionArray, entry.getKey());
+ int regionIndex = findRegionIndex(regionArray, entry.getKey());
int rulesIndex = rulesList.indexOf(entry.getValue());
out.writeShort(regionIndex);
out.writeShort(rulesIndex);
@@ -281,8 +281,8 @@ private void outputFile(Path dstFile, String version,
// alias-region
out.writeShort(links.size());
for (Map.Entry entry : links.entrySet()) {
- int aliasIndex = Arrays.binarySearch(regionArray, entry.getKey());
- int regionIndex = Arrays.binarySearch(regionArray, entry.getValue());
+ int aliasIndex = findRegionIndex(regionArray, entry.getKey());
+ int regionIndex = findRegionIndex(regionArray, entry.getValue());
out.writeShort(aliasIndex);
out.writeShort(regionIndex);
}
@@ -294,6 +294,14 @@ private void outputFile(Path dstFile, String version,
}
}
+ private static int findRegionIndex(String[] regionArray, String region) {
+ int index = Arrays.binarySearch(regionArray, region);
+ if (index < 0) {
+ throw new IllegalArgumentException("Unknown region: " + region);
+ }
+ return index;
+ }
+
/** Whether to output verbose messages. */
private boolean verbose;
diff --git a/make/test/JtregNativeHotspot.gmk b/make/test/JtregNativeHotspot.gmk
index 8916eee90d7bc..97f2f12cb7639 100644
--- a/make/test/JtregNativeHotspot.gmk
+++ b/make/test/JtregNativeHotspot.gmk
@@ -885,7 +885,7 @@ BUILD_HOTSPOT_JTREG_EXECUTABLES_JDK_LIBS_exedaemonDestroy := java.base:libjvm
ifeq ($(call isTargetOs, windows), true)
BUILD_HOTSPOT_JTREG_EXECUTABLES_CFLAGS_exeFPRegs := -MT
- BUILD_HOTSPOT_JTREG_EXCLUDE += exesigtest.c libterminatedThread.c libTestJNI.c libCompleteExit.c libMonitorWithDeadObjectTest.c libTestPsig.c exeGetCreatedJavaVMs.c
+ BUILD_HOTSPOT_JTREG_EXCLUDE += exesigtest.c libterminatedThread.c libTestJNI.c libCompleteExit.c libMonitorWithDeadObjectTest.c libTestPsig.c exeGetCreatedJavaVMs.c libTestUnloadedClass.cpp
BUILD_HOTSPOT_JTREG_LIBRARIES_JDK_LIBS_libnativeStack := java.base:libjvm
BUILD_HOTSPOT_JTREG_LIBRARIES_JDK_LIBS_libVThreadEventTest := java.base:libjvm
else
@@ -1526,6 +1526,7 @@ else
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libCompleteExit += -lpthread
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libMonitorWithDeadObjectTest += -lpthread
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libnativeStack += -lpthread
+ BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libTestUnloadedClass += -lpthread
BUILD_HOTSPOT_JTREG_LIBRARIES_JDK_LIBS_libVThreadEventTest := java.base:libjvm
BUILD_HOTSPOT_JTREG_EXECUTABLES_LIBS_exeGetCreatedJavaVMs := -lpthread
BUILD_HOTSPOT_JTREG_EXECUTABLES_JDK_LIBS_exeGetCreatedJavaVMs := java.base:libjvm
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index fced9cfc35e57..7d2a35cefd86a 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -1244,7 +1244,7 @@ source %{
// r27 is not allocatable when compressed oops is on and heapbase is not
// zero, compressed klass pointers doesn't use r27 after JDK-8234794
- if (UseCompressedOops && (CompressedOops::ptrs_base() != nullptr)) {
+ if (UseCompressedOops && (CompressedOops::base() != nullptr)) {
_NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
_NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
_NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
@@ -2620,7 +2620,8 @@ static bool is_vector_bitwise_not_pattern(Node* n, Node* m) {
bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
if (is_vshift_con_pattern(n, m) ||
is_vector_bitwise_not_pattern(n, m) ||
- is_valid_sve_arith_imm_pattern(n, m)) {
+ is_valid_sve_arith_imm_pattern(n, m) ||
+ is_encode_and_store_pattern(n, m)) {
mstack.push(m, Visit);
return true;
}
@@ -6410,7 +6411,7 @@ instruct loadP(iRegPNoSp dst, memory mem)
instruct loadN(iRegNNoSp dst, memory mem)
%{
match(Set dst (LoadN mem));
- predicate(!needs_acquiring_load(n));
+ predicate(!needs_acquiring_load(n) && n->as_Load()->barrier_data() == 0);
ins_cost(4 * INSN_COST);
format %{ "ldrw $dst, $mem\t# compressed ptr" %}
@@ -6839,7 +6840,7 @@ instruct storeimmP0(immP0 zero, memory mem)
instruct storeN(iRegN src, memory mem)
%{
match(Set mem (StoreN mem src));
- predicate(!needs_releasing_store(n));
+ predicate(!needs_releasing_store(n) && n->as_Store()->barrier_data() == 0);
ins_cost(INSN_COST);
format %{ "strw $src, $mem\t# compressed ptr" %}
@@ -6852,7 +6853,7 @@ instruct storeN(iRegN src, memory mem)
instruct storeImmN0(immN0 zero, memory mem)
%{
match(Set mem (StoreN mem zero));
- predicate(!needs_releasing_store(n));
+ predicate(!needs_releasing_store(n) && n->as_Store()->barrier_data() == 0);
ins_cost(INSN_COST);
format %{ "strw zr, $mem\t# compressed ptr" %}
@@ -7086,6 +7087,7 @@ instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
%{
match(Set dst (LoadN mem));
+ predicate(n->as_Load()->barrier_data() == 0);
ins_cost(VOLATILE_REF_COST);
format %{ "ldarw $dst, $mem\t# compressed ptr" %}
@@ -7253,6 +7255,7 @@ instruct storeimmP0_volatile(immP0 zero, /* sync_memory*/indirect mem)
instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
%{
match(Set mem (StoreN mem src));
+ predicate(n->as_Store()->barrier_data() == 0);
ins_cost(VOLATILE_REF_COST);
format %{ "stlrw $src, $mem\t# compressed ptr" %}
@@ -7265,6 +7268,7 @@ instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
instruct storeimmN0_volatile(immN0 zero, /* sync_memory*/indirect mem)
%{
match(Set mem (StoreN mem zero));
+ predicate(n->as_Store()->barrier_data() == 0);
ins_cost(VOLATILE_REF_COST);
format %{ "stlrw zr, $mem\t# compressed ptr" %}
@@ -8061,6 +8065,7 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval
instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ predicate(n->as_LoadStore()->barrier_data() == 0);
ins_cost(2 * VOLATILE_REF_COST);
effect(KILL cr);
@@ -8175,7 +8180,7 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP new
instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
- predicate(needs_acquiring_load_exclusive(n));
+ predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
@@ -8280,6 +8285,7 @@ instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL ne
// This pattern is generated automatically from cas.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
ins_cost(2 * VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
@@ -8389,7 +8395,7 @@ instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL
// This pattern is generated automatically from cas.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
- predicate(needs_acquiring_load_exclusive(n));
+ predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
@@ -8501,6 +8507,7 @@ instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL ne
// This pattern is generated automatically from cas.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
ins_cost(2 * VOLATILE_REF_COST);
effect(KILL cr);
@@ -8620,7 +8627,7 @@ instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL
// This pattern is generated automatically from cas.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
- predicate(needs_acquiring_load_exclusive(n));
+ predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
@@ -8681,6 +8688,7 @@ instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
%}
instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set prev (GetAndSetN mem newv));
ins_cost(2 * VOLATILE_REF_COST);
format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
@@ -8724,7 +8732,7 @@ instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{
%}
instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
- predicate(needs_acquiring_load_exclusive(n));
+ predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set prev (GetAndSetN mem newv));
ins_cost(VOLATILE_REF_COST);
format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %}
diff --git a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp
index ca175fe1c47b1..89a97a4984fc8 100644
--- a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -57,7 +57,7 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
__ mov_metadata(rscratch1, m);
ce->store_parameter(rscratch1, 1);
ce->store_parameter(_bci, 0);
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::counter_overflow_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
__ b(_continuation);
@@ -66,7 +66,7 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
void RangeCheckStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
if (_info->deoptimize_on_exception()) {
- address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+ address a = Runtime1::entry_for(C1StubId::predicate_failed_trap_id);
__ far_call(RuntimeAddress(a));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
@@ -79,13 +79,13 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
} else {
__ mov(rscratch1, _index->as_jint());
}
- Runtime1::StubID stub_id;
+ C1StubId stub_id;
if (_throw_index_out_of_bounds_exception) {
- stub_id = Runtime1::throw_index_exception_id;
+ stub_id = C1StubId::throw_index_exception_id;
} else {
assert(_array != LIR_Opr::nullOpr(), "sanity");
__ mov(rscratch2, _array->as_pointer_register());
- stub_id = Runtime1::throw_range_check_failed_id;
+ stub_id = C1StubId::throw_range_check_failed_id;
}
__ lea(lr, RuntimeAddress(Runtime1::entry_for(stub_id)));
__ blr(lr);
@@ -100,7 +100,7 @@ PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
- address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+ address a = Runtime1::entry_for(C1StubId::predicate_failed_trap_id);
__ far_call(RuntimeAddress(a));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
@@ -112,7 +112,7 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) {
ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
}
__ bind(_entry);
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::throw_div0_exception_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::throw_div0_exception_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
#ifdef ASSERT
@@ -124,14 +124,14 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) {
// Implementation of NewInstanceStub
-NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, C1StubId stub_id) {
_result = result;
_klass = klass;
_klass_reg = klass_reg;
_info = new CodeEmitInfo(info);
- assert(stub_id == Runtime1::new_instance_id ||
- stub_id == Runtime1::fast_new_instance_id ||
- stub_id == Runtime1::fast_new_instance_init_check_id,
+ assert(stub_id == C1StubId::new_instance_id ||
+ stub_id == C1StubId::fast_new_instance_id ||
+ stub_id == C1StubId::fast_new_instance_init_check_id,
"need new_instance id");
_stub_id = stub_id;
}
@@ -167,7 +167,7 @@ void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
assert(_length->as_register() == r19, "length must in r19,");
assert(_klass_reg->as_register() == r3, "klass_reg must in r3");
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::new_type_array_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
assert(_result->as_register() == r0, "result must in r0");
@@ -190,7 +190,7 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
assert(_length->as_register() == r19, "length must in r19,");
assert(_klass_reg->as_register() == r3, "klass_reg must in r3");
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::new_object_array_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
assert(_result->as_register() == r0, "result must in r0");
@@ -202,11 +202,11 @@ void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
ce->store_parameter(_obj_reg->as_register(), 1);
ce->store_parameter(_lock_reg->as_register(), 0);
- Runtime1::StubID enter_id;
+ C1StubId enter_id;
if (ce->compilation()->has_fpu_code()) {
- enter_id = Runtime1::monitorenter_id;
+ enter_id = C1StubId::monitorenter_id;
} else {
- enter_id = Runtime1::monitorenter_nofpu_id;
+ enter_id = C1StubId::monitorenter_nofpu_id;
}
__ far_call(RuntimeAddress(Runtime1::entry_for(enter_id)));
ce->add_call_info_here(_info);
@@ -223,11 +223,11 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
}
ce->store_parameter(_lock_reg->as_register(), 0);
// note: non-blocking leaf routine => no call info needed
- Runtime1::StubID exit_id;
+ C1StubId exit_id;
if (ce->compilation()->has_fpu_code()) {
- exit_id = Runtime1::monitorexit_id;
+ exit_id = C1StubId::monitorexit_id;
} else {
- exit_id = Runtime1::monitorexit_nofpu_id;
+ exit_id = C1StubId::monitorexit_nofpu_id;
}
__ adr(lr, _continuation);
__ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
@@ -255,7 +255,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
ce->store_parameter(_trap_request, 0);
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::deoptimize_id)));
ce->add_call_info_here(_info);
DEBUG_ONLY(__ should_not_reach_here());
}
@@ -265,9 +265,9 @@ void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
address a;
if (_info->deoptimize_on_exception()) {
// Deoptimize, do not throw the exception, because it is probably wrong to do it here.
- a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+ a = Runtime1::entry_for(C1StubId::predicate_failed_trap_id);
} else {
- a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+ a = Runtime1::entry_for(C1StubId::throw_null_pointer_exception_id);
}
ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
index 91430be5835b5..5e116d82761ac 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
@@ -321,19 +321,19 @@ void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) {
switch (patching_id(info)) {
case PatchingStub::access_field_id:
- target = Runtime1::entry_for(Runtime1::access_field_patching_id);
+ target = Runtime1::entry_for(C1StubId::access_field_patching_id);
reloc_type = relocInfo::section_word_type;
break;
case PatchingStub::load_klass_id:
- target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
+ target = Runtime1::entry_for(C1StubId::load_klass_patching_id);
reloc_type = relocInfo::metadata_type;
break;
case PatchingStub::load_mirror_id:
- target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
+ target = Runtime1::entry_for(C1StubId::load_mirror_patching_id);
reloc_type = relocInfo::oop_type;
break;
case PatchingStub::load_appendix_id:
- target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
+ target = Runtime1::entry_for(C1StubId::load_appendix_patching_id);
reloc_type = relocInfo::oop_type;
break;
default: ShouldNotReachHere();
@@ -375,7 +375,7 @@ int LIR_Assembler::emit_exception_handler() {
__ verify_not_null_oop(r0);
// search an exception handler (r0: exception oop, r3: throwing pc)
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::handle_exception_from_callee_id)));
__ should_not_reach_here();
guarantee(code_offset() - offset <= exception_handler_size(), "overflow");
__ end_a_stub();
@@ -432,7 +432,7 @@ int LIR_Assembler::emit_unwind_handler() {
// remove the activation and dispatch to the unwind handler
__ block_comment("remove_frame and dispatch to the unwind handler");
__ remove_frame(initial_frame_size_in_bytes());
- __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
+ __ far_jump(RuntimeAddress(Runtime1::entry_for(C1StubId::unwind_exception_id)));
// Emit the slow path assembly
if (stub != nullptr) {
@@ -875,19 +875,19 @@ void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) {
switch (patching_id(info)) {
case PatchingStub::access_field_id:
- target = Runtime1::entry_for(Runtime1::access_field_patching_id);
+ target = Runtime1::entry_for(C1StubId::access_field_patching_id);
reloc_type = relocInfo::section_word_type;
break;
case PatchingStub::load_klass_id:
- target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
+ target = Runtime1::entry_for(C1StubId::load_klass_patching_id);
reloc_type = relocInfo::metadata_type;
break;
case PatchingStub::load_mirror_id:
- target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
+ target = Runtime1::entry_for(C1StubId::load_mirror_patching_id);
reloc_type = relocInfo::oop_type;
break;
case PatchingStub::load_appendix_id:
- target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
+ target = Runtime1::entry_for(C1StubId::load_appendix_patching_id);
reloc_type = relocInfo::oop_type;
break;
default: ShouldNotReachHere();
@@ -1356,7 +1356,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
__ br(Assembler::EQ, *success_target);
__ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id)));
__ ldr(klass_RInfo, Address(__ post(sp, 2 * wordSize)));
// result is a boolean
__ cbzw(klass_RInfo, *failure_target);
@@ -1367,7 +1367,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, nullptr);
// call out-of-line instance of __ check_klass_subtype_slow_path(...):
__ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id)));
__ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize)));
// result is a boolean
__ cbz(k_RInfo, *failure_target);
@@ -1446,7 +1446,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, nullptr);
// call out-of-line instance of __ check_klass_subtype_slow_path(...):
__ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id)));
__ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize)));
// result is a boolean
__ cbzw(k_RInfo, *failure_target);
@@ -2035,7 +2035,7 @@ void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmit
// exception object is not added to oop map by LinearScan
// (LinearScan assumes that no oops are in fixed registers)
info->add_register_oop(exceptionOop);
- Runtime1::StubID unwind_id;
+ C1StubId unwind_id;
// get current pc information
// pc is only needed if the method has an exception handler, the unwind code does not need it.
@@ -2054,9 +2054,9 @@ void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmit
__ verify_not_null_oop(r0);
// search an exception handler (r0: exception oop, r3: throwing pc)
if (compilation()->has_fpu_code()) {
- unwind_id = Runtime1::handle_exception_id;
+ unwind_id = C1StubId::handle_exception_id;
} else {
- unwind_id = Runtime1::handle_exception_nofpu_id;
+ unwind_id = C1StubId::handle_exception_nofpu_id;
}
__ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id)));
@@ -2337,7 +2337,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
__ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, nullptr);
__ PUSH(src, dst);
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id)));
__ POP(src, dst);
__ cbnz(src, cont);
diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
index 8f1260feba3ea..4acac65ad5bab 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
@@ -1246,7 +1246,7 @@ void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
args->append(rank);
args->append(varargs);
LIR_Opr reg = result_register_for(x->type());
- __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
+ __ call_runtime(Runtime1::entry_for(C1StubId::new_multi_array_id),
LIR_OprFact::illegalOpr,
reg, args, info);
@@ -1277,14 +1277,14 @@ void LIRGenerator::do_CheckCast(CheckCast* x) {
CodeStub* stub;
if (x->is_incompatible_class_change_check()) {
assert(patching_info == nullptr, "can't patch this");
- stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception);
+ stub = new SimpleExceptionStub(C1StubId::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception);
} else if (x->is_invokespecial_receiver_check()) {
assert(patching_info == nullptr, "can't patch this");
stub = new DeoptimizeStub(info_for_exception,
Deoptimization::Reason_class_check,
Deoptimization::Action_none);
} else {
- stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
+ stub = new SimpleExceptionStub(C1StubId::throw_class_cast_exception_id, obj.result(), info_for_exception);
}
LIR_Opr reg = rlock_result(x);
LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
index d0d11d437e83e..8d1b3902ce42e 100644
--- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
@@ -267,7 +267,7 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register
if (CURRENT_ENV->dtrace_alloc_probes()) {
assert(obj == r0, "must be");
- far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::dtrace_object_alloc_id)));
}
verify_oop(obj);
@@ -308,7 +308,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1,
if (CURRENT_ENV->dtrace_alloc_probes()) {
assert(obj == r0, "must be");
- far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::dtrace_object_alloc_id)));
}
verify_oop(obj);
diff --git a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp
index cb9eb03c580d2..0b9acc0f3a885 100644
--- a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp
@@ -100,10 +100,10 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre
if (frame_size() == no_frame_size) {
leave();
far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
- } else if (_stub_id == Runtime1::forward_exception_id) {
+ } else if (_stub_id == (int)C1StubId::forward_exception_id) {
should_not_reach_here();
} else {
- far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
+ far_jump(RuntimeAddress(Runtime1::entry_for(C1StubId::forward_exception_id)));
}
bind(L);
}
@@ -358,7 +358,7 @@ OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address targe
}
-OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
+OopMapSet* Runtime1::generate_handle_exception(C1StubId id, StubAssembler *sasm) {
__ block_comment("generate_handle_exception");
// incoming parameters
@@ -370,7 +370,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
OopMapSet* oop_maps = new OopMapSet();
OopMap* oop_map = nullptr;
switch (id) {
- case forward_exception_id:
+ case C1StubId::forward_exception_id:
// We're handling an exception in the context of a compiled frame.
// The registers have been saved in the standard places. Perform
// an exception lookup in the caller and dispatch to the handler
@@ -390,12 +390,12 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
__ str(zr, Address(rthread, JavaThread::vm_result_offset()));
__ str(zr, Address(rthread, JavaThread::vm_result_2_offset()));
break;
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
// At this point all registers MAY be live.
- oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id);
+ oop_map = save_live_registers(sasm, id != C1StubId::handle_exception_nofpu_id);
break;
- case handle_exception_from_callee_id: {
+ case C1StubId::handle_exception_from_callee_id: {
// At this point all registers except exception oop (r0) and
// exception pc (lr) are dead.
const int frame_size = 2 /*fp, return address*/;
@@ -453,13 +453,13 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
__ str(r0, Address(rfp, 1*BytesPerWord));
switch (id) {
- case forward_exception_id:
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::forward_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
// Restore the registers that were saved at the beginning.
- restore_live_registers(sasm, id != handle_exception_nofpu_id);
+ restore_live_registers(sasm, id != C1StubId::handle_exception_nofpu_id);
break;
- case handle_exception_from_callee_id:
+ case C1StubId::handle_exception_from_callee_id:
break;
default: ShouldNotReachHere();
}
@@ -611,7 +611,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
}
-OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) {
const Register exception_oop = r0;
const Register exception_pc = r3;
@@ -628,7 +628,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
OopMap* oop_map = nullptr;
switch (id) {
{
- case forward_exception_id:
+ case C1StubId::forward_exception_id:
{
oop_maps = generate_handle_exception(id, sasm);
__ leave();
@@ -636,31 +636,31 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case throw_div0_exception_id:
+ case C1StubId::throw_div0_exception_id:
{ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
}
break;
- case throw_null_pointer_exception_id:
+ case C1StubId::throw_null_pointer_exception_id:
{ StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
}
break;
- case new_instance_id:
- case fast_new_instance_id:
- case fast_new_instance_init_check_id:
+ case C1StubId::new_instance_id:
+ case C1StubId::fast_new_instance_id:
+ case C1StubId::fast_new_instance_init_check_id:
{
Register klass = r3; // Incoming
Register obj = r0; // Result
- if (id == new_instance_id) {
+ if (id == C1StubId::new_instance_id) {
__ set_info("new_instance", dont_gc_arguments);
- } else if (id == fast_new_instance_id) {
+ } else if (id == C1StubId::fast_new_instance_id) {
__ set_info("fast new_instance", dont_gc_arguments);
} else {
- assert(id == fast_new_instance_init_check_id, "bad StubID");
+ assert(id == C1StubId::fast_new_instance_init_check_id, "bad C1StubId");
__ set_info("fast new_instance init check", dont_gc_arguments);
}
@@ -679,7 +679,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
break;
- case counter_overflow_id:
+ case C1StubId::counter_overflow_id:
{
Register bci = r0, method = r1;
__ enter();
@@ -697,14 +697,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case new_type_array_id:
- case new_object_array_id:
+ case C1StubId::new_type_array_id:
+ case C1StubId::new_object_array_id:
{
Register length = r19; // Incoming
Register klass = r3; // Incoming
Register obj = r0; // Result
- if (id == new_type_array_id) {
+ if (id == C1StubId::new_type_array_id) {
__ set_info("new_type_array", dont_gc_arguments);
} else {
__ set_info("new_object_array", dont_gc_arguments);
@@ -717,7 +717,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
Register t0 = obj;
__ ldrw(t0, Address(klass, Klass::layout_helper_offset()));
__ asrw(t0, t0, Klass::_lh_array_tag_shift);
- int tag = ((id == new_type_array_id)
+ int tag = ((id == C1StubId::new_type_array_id)
? Klass::_lh_array_tag_type_value
: Klass::_lh_array_tag_obj_value);
__ mov(rscratch1, tag);
@@ -732,7 +732,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ enter();
OopMap* map = save_live_registers(sasm);
int call_offset;
- if (id == new_type_array_id) {
+ if (id == C1StubId::new_type_array_id) {
call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
} else {
call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
@@ -750,7 +750,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case new_multi_array_id:
+ case C1StubId::new_multi_array_id:
{ StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
// r0,: klass
// r19,: rank
@@ -770,7 +770,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case register_finalizer_id:
+ case C1StubId::register_finalizer_id:
{
__ set_info("register_finalizer", dont_gc_arguments);
@@ -802,19 +802,19 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case throw_class_cast_exception_id:
+ case C1StubId::throw_class_cast_exception_id:
{ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
}
break;
- case throw_incompatible_class_change_error_id:
+ case C1StubId::throw_incompatible_class_change_error_id:
{ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
}
break;
- case slow_subtype_check_id:
+ case C1StubId::slow_subtype_check_id:
{
// Typical calling sequence:
// __ push(klass_RInfo); // object klass or other subclass
@@ -857,10 +857,10 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case monitorenter_nofpu_id:
+ case C1StubId::monitorenter_nofpu_id:
save_fpu_registers = false;
// fall through
- case monitorenter_id:
+ case C1StubId::monitorenter_id:
{
StubFrame f(sasm, "monitorenter", dont_gc_arguments);
OopMap* map = save_live_registers(sasm, save_fpu_registers);
@@ -878,10 +878,10 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case monitorexit_nofpu_id:
+ case C1StubId::monitorexit_nofpu_id:
save_fpu_registers = false;
// fall through
- case monitorexit_id:
+ case C1StubId::monitorexit_id:
{
StubFrame f(sasm, "monitorexit", dont_gc_arguments);
OopMap* map = save_live_registers(sasm, save_fpu_registers);
@@ -901,7 +901,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case deoptimize_id:
+ case C1StubId::deoptimize_id:
{
StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return);
OopMap* oop_map = save_live_registers(sasm);
@@ -918,13 +918,13 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case throw_range_check_failed_id:
+ case C1StubId::throw_range_check_failed_id:
{ StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
}
break;
- case unwind_exception_id:
+ case C1StubId::unwind_exception_id:
{ __ set_info("unwind_exception", dont_gc_arguments);
// note: no stubframe since we are about to leave the current
// activation and we are calling a leaf VM function only.
@@ -932,54 +932,54 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case access_field_patching_id:
+ case C1StubId::access_field_patching_id:
{ StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return);
// we should set up register map
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
}
break;
- case load_klass_patching_id:
+ case C1StubId::load_klass_patching_id:
{ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return);
// we should set up register map
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
}
break;
- case load_mirror_patching_id:
+ case C1StubId::load_mirror_patching_id:
{ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return);
// we should set up register map
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
}
break;
- case load_appendix_patching_id:
+ case C1StubId::load_appendix_patching_id:
{ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return);
// we should set up register map
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
}
break;
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
{ StubFrame f(sasm, "handle_exception", dont_gc_arguments);
oop_maps = generate_handle_exception(id, sasm);
}
break;
- case handle_exception_from_callee_id:
+ case C1StubId::handle_exception_from_callee_id:
{ StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
oop_maps = generate_handle_exception(id, sasm);
}
break;
- case throw_index_exception_id:
+ case C1StubId::throw_index_exception_id:
{ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
}
break;
- case throw_array_store_exception_id:
+ case C1StubId::throw_array_store_exception_id:
{ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return);
// tos + 0: link
// + 1: return address
@@ -987,7 +987,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case predicate_failed_trap_id:
+ case C1StubId::predicate_failed_trap_id:
{
StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return);
@@ -1005,7 +1005,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case dtrace_object_alloc_id:
+ case C1StubId::dtrace_object_alloc_id:
{ // c_rarg0: object
StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
save_live_registers(sasm);
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
index b4c12ecd4a849..62831ee72ba05 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@@ -150,10 +150,12 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe
Register oop = objectReg;
Register box = boxReg;
Register disp_hdr = tmpReg;
+ Register owner_addr = tmpReg;
Register tmp = tmp2Reg;
Label cont;
Label object_has_monitor;
Label count, no_count;
+ Label unlocked;
assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight");
assert_different_registers(oop, box, tmp, disp_hdr);
@@ -204,14 +206,40 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe
b(cont);
bind(notRecursive);
+
+ // Compute owner address.
+ lea(owner_addr, Address(tmp, ObjectMonitor::owner_offset()));
+
+ // Set owner to null.
+ // Release to satisfy the JMM
+ stlr(zr, owner_addr);
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
+
+ // Check if the entry lists are empty.
ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset()));
- ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset()));
- orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
- cmp(rscratch1, zr); // Sets flags for result
- cbnz(rscratch1, cont);
- // need a release store here
- lea(tmp, Address(tmp, ObjectMonitor::owner_offset()));
- stlr(zr, tmp); // set unowned
+ ldr(tmpReg, Address(tmp, ObjectMonitor::cxq_offset()));
+ orr(rscratch1, rscratch1, tmpReg);
+ cmp(rscratch1, zr);
+ br(Assembler::EQ, cont); // If so we are done.
+
+ // Check if there is a successor.
+ ldr(rscratch1, Address(tmp, ObjectMonitor::succ_offset()));
+ cmp(rscratch1, zr);
+ br(Assembler::NE, unlocked); // If so we are done.
+
+ // Save the monitor pointer in the current thread, so we can try to
+ // reacquire the lock in SharedRuntime::monitor_exit_helper().
+ str(tmp, Address(rthread, JavaThread::unlocked_inflated_monitor_offset()));
+
+ cmp(zr, rthread); // Set Flag to NE => slow path
+ b(cont);
+
+ bind(unlocked);
+ cmp(zr, zr); // Set Flag to EQ => fast path
+
+ // Intentional fall-through
bind(cont);
// flag == EQ indicates success
@@ -498,33 +526,41 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Regi
bind(not_recursive);
- Label release;
const Register t2_owner_addr = t2;
// Compute owner address.
lea(t2_owner_addr, Address(t1_monitor, ObjectMonitor::owner_offset()));
+ // Set owner to null.
+ // Release to satisfy the JMM
+ stlr(zr, t2_owner_addr);
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
+
// Check if the entry lists are empty.
ldr(rscratch1, Address(t1_monitor, ObjectMonitor::EntryList_offset()));
ldr(t3_t, Address(t1_monitor, ObjectMonitor::cxq_offset()));
orr(rscratch1, rscratch1, t3_t);
cmp(rscratch1, zr);
- br(Assembler::EQ, release);
+ br(Assembler::EQ, unlocked); // If so we are done.
- // The owner may be anonymous and we removed the last obj entry in
- // the lock-stack. This loses the information about the owner.
- // Write the thread to the owner field so the runtime knows the owner.
- str(rthread, Address(t2_owner_addr));
- b(slow_path);
+ // Check if there is a successor.
+ ldr(rscratch1, Address(t1_monitor, ObjectMonitor::succ_offset()));
+ cmp(rscratch1, zr);
+ br(Assembler::NE, unlocked); // If so we are done.
- bind(release);
- // Set owner to null.
- // Release to satisfy the JMM
- stlr(zr, t2_owner_addr);
+ // Save the monitor pointer in the current thread, so we can try to
+ // reacquire the lock in SharedRuntime::monitor_exit_helper().
+ str(t1_monitor, Address(rthread, JavaThread::unlocked_inflated_monitor_offset()));
+
+ cmp(zr, rthread); // Set Flag to NE => slow path
+ b(slow_path);
}
bind(unlocked);
decrement(Address(rthread, JavaThread::held_monitor_count_offset()));
+ cmp(zr, zr); // Set Flags to EQ => fast path
#ifdef ASSERT
// Check that unlocked label is reached with Flags == EQ.
diff --git a/src/hotspot/cpu/aarch64/cas.m4 b/src/hotspot/cpu/aarch64/cas.m4
index f8aac0c4939fa..7e13e153db18a 100644
--- a/src/hotspot/cpu/aarch64/cas.m4
+++ b/src/hotspot/cpu/aarch64/cas.m4
@@ -45,7 +45,9 @@ define(`CAS_INSN',
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct compareAndExchange$1$6(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));),
+ $1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);),
$1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
+ $1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
$6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
`dnl')
match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
@@ -122,7 +124,9 @@ define(`CAS_INSN3',
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct weakCompareAndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));),
+ $1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);),
$1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
+ $1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
$6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
`dnl')
match(Set res (WeakCompareAndSwap$1 mem (Binary oldval newval)));
diff --git a/src/hotspot/cpu/aarch64/compressedKlass_aarch64.cpp b/src/hotspot/cpu/aarch64/compressedKlass_aarch64.cpp
index 54af69ffaba6c..fc78813c161fc 100644
--- a/src/hotspot/cpu/aarch64/compressedKlass_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/compressedKlass_aarch64.cpp
@@ -129,5 +129,7 @@ void CompressedKlassPointers::initialize(address addr, size_t len) {
address const end = addr + len;
_base = (end <= (address)unscaled_max) ? nullptr : addr;
- _range = end - _base;
+ // Remember the Klass range:
+ _klass_range_start = addr;
+ _klass_range_end = addr + len;
}
diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
index d02038b6e9193..b978c350ce131 100644
--- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
@@ -38,7 +38,10 @@
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/g1/c1/g1BarrierSetC1.hpp"
-#endif
+#endif // COMPILER1
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif // COMPILER2
#define __ masm->
@@ -95,6 +98,54 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
__ pop(saved_regs, sp);
}
+static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
+ const Register thread, const Register value, const Register temp1, const Register temp2) {
+ // Can we store a value in the given thread's buffer?
+ // (The index field is typed as size_t.)
+ __ ldr(temp1, Address(thread, in_bytes(index_offset))); // temp1 := *(index address)
+ __ cbz(temp1, runtime); // jump to runtime if index == 0 (full buffer)
+ // The buffer is not full, store value into it.
+ __ sub(temp1, temp1, wordSize); // temp1 := next index
+ __ str(temp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index
+ __ ldr(temp2, Address(thread, in_bytes(buffer_offset))); // temp2 := buffer address
+ __ str(value, Address(temp2, temp1)); // *(buffer address + next index) := value
+}
+
+static void generate_pre_barrier_fast_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp1) {
+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+ // Is marking active?
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+ __ ldrw(tmp1, in_progress);
+ } else {
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ ldrb(tmp1, in_progress);
+ }
+}
+
+static void generate_pre_barrier_slow_path(MacroAssembler* masm,
+ const Register obj,
+ const Register pre_val,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ Label& runtime) {
+ // Do we need to load the previous value?
+ if (obj != noreg) {
+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
+ }
+ // Is the previous value null?
+ __ cbz(pre_val, done);
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::satb_mark_queue_index_offset(),
+ G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ runtime,
+ thread, pre_val, tmp1, tmp2);
+ __ b(done);
+}
+
void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
Register obj,
Register pre_val,
@@ -115,43 +166,10 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
assert_different_registers(obj, pre_val, tmp1, tmp2);
assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
- Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
- Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
-
- // Is marking active?
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
- __ ldrw(tmp1, in_progress);
- } else {
- assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ ldrb(tmp1, in_progress);
- }
+ generate_pre_barrier_fast_path(masm, thread, tmp1);
+ // If marking is not active (*(mark queue active address) == 0), jump to done
__ cbzw(tmp1, done);
-
- // Do we need to load the previous value?
- if (obj != noreg) {
- __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
- }
-
- // Is the previous value null?
- __ cbz(pre_val, done);
-
- // Can we store original value in the thread's buffer?
- // Is index == 0?
- // (The index field is typed as size_t.)
-
- __ ldr(tmp1, index); // tmp := *index_adr
- __ cbz(tmp1, runtime); // tmp == 0?
- // If yes, goto runtime
-
- __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize
- __ str(tmp1, index); // *index_adr := tmp
- __ ldr(tmp2, buffer);
- __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr
-
- // Record the previous value
- __ str(pre_val, Address(tmp1, 0));
- __ b(done);
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, done, runtime);
__ bind(runtime);
@@ -182,6 +200,50 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
}
+static void generate_post_barrier_fast_path(MacroAssembler* masm,
+ const Register store_addr,
+ const Register new_val,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ bool new_val_may_be_null) {
+ // Does store cross heap regions?
+ __ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
+ __ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
+ __ cbz(tmp1, done);
+ // Crosses regions, storing null?
+ if (new_val_may_be_null) {
+ __ cbz(new_val, done);
+ }
+ // Storing region crossing non-null, is card young?
+ __ lsr(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
+ __ load_byte_map_base(tmp2); // tmp2 := card table base address
+ __ add(tmp1, tmp1, tmp2); // tmp1 := card address
+ __ ldrb(tmp2, Address(tmp1)); // tmp2 := card
+ __ cmpw(tmp2, (int)G1CardTable::g1_young_card_val()); // tmp2 := card == young_card_val?
+}
+
+static void generate_post_barrier_slow_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ Label& runtime) {
+ __ membar(Assembler::StoreLoad); // StoreLoad membar
+ __ ldrb(tmp2, Address(tmp1)); // tmp2 := card
+ __ cbzw(tmp2, done);
+ // Storing a region crossing, non-null oop, card is clean.
+ // Dirty card and log.
+ STATIC_ASSERT(CardTable::dirty_card_val() == 0);
+ __ strb(zr, Address(tmp1)); // *(card address) := dirty_card_val
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ runtime,
+ thread, tmp1, tmp2, rscratch1);
+ __ b(done);
+}
+
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
@@ -194,70 +256,116 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
assert(store_addr != noreg && new_val != noreg && tmp1 != noreg
&& tmp2 != noreg, "expecting a register");
- Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
-
- BarrierSet* bs = BarrierSet::barrier_set();
- CardTableBarrierSet* ctbs = barrier_set_cast(bs);
- CardTable* ct = ctbs->card_table();
-
Label done;
Label runtime;
- // Does store cross heap regions?
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
+ // If card is young, jump to done
+ __ br(Assembler::EQ, done);
+ generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime);
- __ eor(tmp1, store_addr, new_val);
- __ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes);
- __ cbz(tmp1, done);
+ __ bind(runtime);
+ // save the live input values
+ RegSet saved = RegSet::of(store_addr);
+ __ push(saved, sp);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread);
+ __ pop(saved, sp);
- // crosses regions, storing null?
+ __ bind(done);
+}
- __ cbz(new_val, done);
+#if defined(COMPILER2)
- // storing region crossing non-null, is card already dirty?
+static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) {
+ SaveLiveRegisters save_registers(masm, stub);
+ if (c_rarg0 != arg) {
+ __ mov(c_rarg0, arg);
+ }
+ __ mov(c_rarg1, rthread);
+ __ mov(rscratch1, runtime_path);
+ __ blr(rscratch1);
+}
- const Register card_addr = tmp1;
+void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* stub) {
+ assert(thread == rthread, "must be");
+ assert_different_registers(obj, pre_val, tmp1, tmp2);
+ assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
- __ lsr(card_addr, store_addr, CardTable::card_shift());
+ stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2);
- // get the address of the card
- __ load_byte_map_base(tmp2);
- __ add(card_addr, card_addr, tmp2);
- __ ldrb(tmp2, Address(card_addr));
- __ cmpw(tmp2, (int)G1CardTable::g1_young_card_val());
- __ br(Assembler::EQ, done);
+ generate_pre_barrier_fast_path(masm, thread, tmp1);
+ // If marking is active (*(mark queue active address) != 0), jump to stub (slow path)
+ __ cbnzw(tmp1, *stub->entry());
- assert((int)CardTable::dirty_card_val() == 0, "must be 0");
+ __ bind(*stub->continuation());
+}
- __ membar(Assembler::StoreLoad);
+void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register obj = stub->obj();
+ Register pre_val = stub->pre_val();
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1();
+ Register tmp2 = stub->tmp2();
- __ ldrb(tmp2, Address(card_addr));
- __ cbzw(tmp2, done);
+ __ bind(*stub->entry());
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime);
- // storing a region crossing, non-null oop, card is clean.
- // dirty card and log.
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
+ __ b(*stub->continuation());
+}
- __ strb(zr, Address(card_addr));
+void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* stub) {
+ assert(thread == rthread, "must be");
+ assert_different_registers(store_addr, new_val, thread, tmp1, tmp2,
+ rscratch1);
+ assert(store_addr != noreg && new_val != noreg && tmp1 != noreg
+ && tmp2 != noreg, "expecting a register");
- __ ldr(rscratch1, queue_index);
- __ cbz(rscratch1, runtime);
- __ sub(rscratch1, rscratch1, wordSize);
- __ str(rscratch1, queue_index);
+ stub->initialize_registers(thread, tmp1, tmp2);
- __ ldr(tmp2, buffer);
- __ str(card_addr, Address(tmp2, rscratch1));
- __ b(done);
+ bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
+ // If card is not young, jump to stub (slow path)
+ __ br(Assembler::NE, *stub->entry());
- __ bind(runtime);
- // save the live input values
- RegSet saved = RegSet::of(store_addr);
- __ push(saved, sp);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
- __ pop(saved, sp);
+ __ bind(*stub->continuation());
+}
- __ bind(done);
+void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
+ Register tmp2 = stub->tmp2();
+ assert(stub->tmp3() == noreg, "not needed in this platform");
+
+ __ bind(*stub->entry());
+ generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime);
+
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
+ __ b(*stub->continuation());
}
+#endif // COMPILER2
+
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1, Register tmp2) {
bool on_oop = is_reference_type(type);
diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp
index 7b4bc8cdc49de..4baa18cb94544 100644
--- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp
@@ -33,6 +33,8 @@ class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
+class G1PreBarrierStubC2;
+class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -69,6 +71,27 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
#endif
+#ifdef COMPILER2
+ void g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* c2_stub);
+ void generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const;
+ void g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* c2_stub);
+ void generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const;
+#endif
+
void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1, Register tmp2);
};
diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad
new file mode 100644
index 0000000000000..081a67d68807b
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad
@@ -0,0 +1,680 @@
+//
+// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+source_hpp %{
+
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#include "gc/shared/gc_globals.hpp"
+
+%}
+
+source %{
+
+#include "gc/g1/g1BarrierSetAssembler_aarch64.hpp"
+#include "gc/g1/g1BarrierSetRuntime.hpp"
+
+static void write_barrier_pre(MacroAssembler* masm,
+ const MachNode* node,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2,
+ RegSet preserve = RegSet(),
+ RegSet no_preserve = RegSet()) {
+ if (!G1PreBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node);
+ for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) {
+ stub->preserve(*reg);
+ }
+ for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) {
+ stub->dont_preserve(*reg);
+ }
+ g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, rthread, tmp1, tmp2, stub);
+}
+
+static void write_barrier_post(MacroAssembler* masm,
+ const MachNode* node,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2) {
+ if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, rthread, tmp1, tmp2, stub);
+}
+
+%}
+
+// BEGIN This section of the file is automatically generated. Do not edit --------------
+
+// This section is generated from g1_aarch64.m4
+
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1StoreP(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreP mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(INSN_COST);
+ format %{ "str $src, $mem\t# ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ str($src$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(istore_reg_mem);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1StorePVolatile(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreP mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "stlr $src, $mem\t# ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ stlr($src$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1StoreN(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(INSN_COST);
+ format %{ "strw $src, $mem\t# compressed ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ strw($src$$Register, $mem$$Register);
+ if ((barrier_data() & G1C2BarrierPost) != 0) {
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ decode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ }
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(istore_reg_mem);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1StoreNVolatile(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "stlrw $src, $mem\t# compressed ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ stlrw($src$$Register, $mem$$Register);
+ if ((barrier_data() & G1C2BarrierPost) != 0) {
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ decode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ }
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1EncodePAndStoreN(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem (EncodeP src)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(INSN_COST);
+ format %{ "encode_heap_oop $tmp1, $src\n\t"
+ "strw $tmp1, $mem\t# compressed ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ encode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ __ strw($tmp1$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(istore_reg_mem);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1EncodePAndStoreNVolatile(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem (EncodeP src)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "encode_heap_oop $tmp1, $src\n\t"
+ "stlrw $tmp1, $mem\t# compressed ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ encode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ __ stlrw($tmp1$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $res = $mem, $oldval, $newval\t# ptr" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ // Pass $oldval to the pre-barrier (instead of loading from $mem), because
+ // $oldval is the only value that can be overwritten.
+ // The same holds for g1CompareAndSwapP and its Acq variant.
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+ false /* acquire */, true /* release */, false /* weak */, $res$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# ptr" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ // Pass $oldval to the pre-barrier (instead of loading from $mem), because
+ // $oldval is the only value that can be overwritten.
+ // The same holds for g1CompareAndSwapP and its Acq variant.
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+ true /* acquire */, true /* release */, false /* weak */, $res$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $res = $mem, $oldval, $newval\t# narrow oop" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word,
+ false /* acquire */, true /* release */, false /* weak */, $res$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# narrow oop" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word,
+ true /* acquire */, true /* release */, false /* weak */, $res$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndSwapP(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $mem, $oldval, $newval\t# (ptr)\n\t"
+ "cset $res, EQ" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+ false /* acquire */, true /* release */, false /* weak */, noreg);
+ __ cset($res$$Register, Assembler::EQ);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr)\n\t"
+ "cset $res, EQ" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+ true /* acquire */, true /* release */, false /* weak */, noreg);
+ __ cset($res$$Register, Assembler::EQ);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndSwapN(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $mem, $oldval, $newval\t# (narrow oop)\n\t"
+ "cset $res, EQ" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word,
+ false /* acquire */, true /* release */, false /* weak */, noreg);
+ __ cset($res$$Register, Assembler::EQ);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop)\n\t"
+ "cset $res, EQ" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word,
+ true /* acquire */, true /* release */, false /* weak */, noreg);
+ __ cset($res$$Register, Assembler::EQ);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1GetAndSetP(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetP mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "atomic_xchg $preval, $newval, [$mem]" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $preval$$Register /* pre_val (as a temporary register) */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchg($preval$$Register, $newval$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1GetAndSetPAcq(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetP mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "atomic_xchg_acq $preval, $newval, [$mem]" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $preval$$Register /* pre_val (as a temporary register) */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchgal($preval$$Register, $newval$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1GetAndSetN(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetN mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "atomic_xchgw $preval, $newval, [$mem]" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchgw($preval$$Register, $newval$$Register, $mem$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1GetAndSetNAcq(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetN mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "atomic_xchgw_acq $preval, $newval, [$mem]" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchgalw($preval$$Register, $newval$$Register, $mem$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1LoadP(iRegPNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ // This instruction does not need an acquiring counterpart because it is only
+ // used for reference loading (Reference::get()). The same holds for g1LoadN.
+ predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadP mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(4 * INSN_COST);
+ format %{ "ldr $dst, $mem\t# ptr" %}
+ ins_encode %{
+ __ ldr($dst$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $dst$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(iload_reg_mem);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1LoadN(iRegNNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadN mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(4 * INSN_COST);
+ format %{ "ldrw $dst, $mem\t# compressed ptr" %}
+ ins_encode %{
+ __ ldrw($dst$$Register, $mem$$Register);
+ if ((barrier_data() & G1C2BarrierPre) != 0) {
+ __ decode_heap_oop($tmp1$$Register, $dst$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ }
+ %}
+ ins_pipe(iload_reg_mem);
+%}
+
+// END This section of the file is automatically generated. Do not edit --------------
diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.m4 b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.m4
new file mode 100644
index 0000000000000..8fb1f7e8e428b
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.m4
@@ -0,0 +1,384 @@
+dnl Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+dnl
+dnl This code is free software; you can redistribute it and/or modify it
+dnl under the terms of the GNU General Public License version 2 only, as
+dnl published by the Free Software Foundation.
+dnl
+dnl This code is distributed in the hope that it will be useful, but WITHOUT
+dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+dnl FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl version 2 for more details (a copy is included in the LICENSE file that
+dnl accompanied this code).
+dnl
+dnl You should have received a copy of the GNU General Public License version
+dnl 2 along with this work; if not, write to the Free Software Foundation,
+dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+dnl
+dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+dnl or visit www.oracle.com if you need additional information or have any
+dnl questions.
+dnl
+// BEGIN This section of the file is automatically generated. Do not edit --------------
+
+// This section is generated from g1_aarch64.m4
+
+define(`STOREP_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1StoreP$1(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Volatile,'needs_releasing_store(n)`,'!needs_releasing_store(n)`) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreP mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(ifelse($1,Volatile,VOLATILE_REF_COST,INSN_COST));
+ format %{ "$2 $src, $mem\t# ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ $2($src$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(ifelse($1,Volatile,pipe_class_memory,istore_reg_mem));
+%}')dnl
+STOREP_INSN(,str)
+STOREP_INSN(Volatile,stlr)
+dnl
+define(`STOREN_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1StoreN$1(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Volatile,'needs_releasing_store(n)`,'!needs_releasing_store(n)`) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(ifelse($1,Volatile,VOLATILE_REF_COST,INSN_COST));
+ format %{ "$2 $src, $mem\t# compressed ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ $2($src$$Register, $mem$$Register);
+ if ((barrier_data() & G1C2BarrierPost) != 0) {
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ decode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ }
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(ifelse($1,Volatile,pipe_class_memory,istore_reg_mem));
+%}')dnl
+STOREN_INSN(,strw)
+STOREN_INSN(Volatile,stlrw)
+dnl
+define(`ENCODESTOREN_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1EncodePAndStoreN$1(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Volatile,'needs_releasing_store(n)`,'!needs_releasing_store(n)`) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem (EncodeP src)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(ifelse($1,Volatile,VOLATILE_REF_COST,INSN_COST));
+ format %{ "encode_heap_oop $tmp1, $src\n\t"
+ "$2 $tmp1, $mem\t# compressed ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ encode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ __ $2($tmp1$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(ifelse($1,Volatile,pipe_class_memory,istore_reg_mem));
+%}')dnl
+ENCODESTOREN_INSN(,strw)
+ENCODESTOREN_INSN(Volatile,stlrw)
+dnl
+define(`CAEP_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndExchangeP$1(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST));
+ format %{ "cmpxchg$2 $res = $mem, $oldval, $newval\t# ptr" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ // Pass $oldval to the pre-barrier (instead of loading from $mem), because
+ // $oldval is the only value that can be overwritten.
+ // The same holds for g1CompareAndSwapP and its Acq variant.
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+ $3 /* acquire */, true /* release */, false /* weak */, $res$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+CAEP_INSN(,,false)
+CAEP_INSN(Acq,_acq,true)
+dnl
+define(`CAEN_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndExchangeN$1(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST));
+ format %{ "cmpxchg$2 $res = $mem, $oldval, $newval\t# narrow oop" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word,
+ $3 /* acquire */, true /* release */, false /* weak */, $res$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+CAEN_INSN(,,false)
+CAEN_INSN(Acq,_acq,true)
+dnl
+define(`CASP_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndSwapP$1(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST));
+ format %{ "cmpxchg$2 $mem, $oldval, $newval\t# (ptr)\n\t"
+ "cset $res, EQ" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+ $3 /* acquire */, true /* release */, false /* weak */, noreg);
+ __ cset($res$$Register, Assembler::EQ);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+CASP_INSN(,,false)
+CASP_INSN(Acq,_acq,true)
+dnl
+define(`CASN_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndSwapN$1(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST));
+ format %{ "cmpxchg$2 $mem, $oldval, $newval\t# (narrow oop)\n\t"
+ "cset $res, EQ" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word,
+ $3 /* acquire */, true /* release */, false /* weak */, noreg);
+ __ cset($res$$Register, Assembler::EQ);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+CASN_INSN(,,false)
+CASN_INSN(Acq,_acq,true)
+dnl
+define(`XCHGP_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1GetAndSetP$1(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetP mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST));
+ format %{ "atomic_xchg$2 $preval, $newval, [$mem]" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $preval$$Register /* pre_val (as a temporary register) */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ $3($preval$$Register, $newval$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}')dnl
+XCHGP_INSN(,,atomic_xchg)
+XCHGP_INSN(Acq,_acq,atomic_xchgal)
+dnl
+define(`XCHGN_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1GetAndSetN$1(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetN mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST));
+ format %{ "$2 $preval, $newval, [$mem]" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ $3($preval$$Register, $newval$$Register, $mem$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}')dnl
+XCHGN_INSN(,atomic_xchgw,atomic_xchgw)
+XCHGN_INSN(Acq,atomic_xchgw_acq,atomic_xchgalw)
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1LoadP(iRegPNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ // This instruction does not need an acquiring counterpart because it is only
+ // used for reference loading (Reference::get()). The same holds for g1LoadN.
+ predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadP mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(4 * INSN_COST);
+ format %{ "ldr $dst, $mem\t# ptr" %}
+ ins_encode %{
+ __ ldr($dst$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $dst$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(iload_reg_mem);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1LoadN(iRegNNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadN mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(4 * INSN_COST);
+ format %{ "ldrw $dst, $mem\t# compressed ptr" %}
+ ins_encode %{
+ __ ldrw($dst$$Register, $mem$$Register);
+ if ((barrier_data() & G1C2BarrierPre) != 0) {
+ __ decode_heap_oop($tmp1$$Register, $dst$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ }
+ %}
+ ins_pipe(iload_reg_mem);
+%}
+
+// END This section of the file is automatically generated. Do not edit --------------
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
index 06f4382015603..84d06dbcc7bfd 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
@@ -67,9 +67,9 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec
__ push(saved_regs, sp);
if (UseCompressedOops) {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), src, dst, count);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop), src, dst, count);
} else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop), src, dst, count);
}
__ pop(saved_regs, sp);
__ bind(done);
@@ -164,9 +164,9 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
if (expand_call) {
assert(pre_val != c_rarg1, "smashed arg");
- __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread);
} else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread);
}
__ pop(saved, sp);
@@ -698,7 +698,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
__ bind(runtime);
__ push_call_clobbered_registers();
__ load_parameter(0, pre_val);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread);
__ pop_call_clobbered_registers();
__ bind(done);
diff --git a/src/hotspot/cpu/aarch64/gc/z/zAddress_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zAddress_aarch64.cpp
index cd834969e1a4f..fcec3ae64fde8 100644
--- a/src/hotspot/cpu/aarch64/gc/z/zAddress_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/z/zAddress_aarch64.cpp
@@ -93,7 +93,7 @@ static size_t probe_valid_max_address_bit() {
}
size_t ZPlatformAddressOffsetBits() {
- const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
+ static const size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
const size_t min_address_offset_bits = max_address_offset_bits - 2;
const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
index d09ef26cef995..c5c02619d446e 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -2967,7 +2967,7 @@ void MacroAssembler::verify_heapbase(const char* msg) {
if (CheckCompressedOops) {
Label ok;
push(1 << rscratch1->encoding(), sp); // cmpptr trashes rscratch1
- cmpptr(rheapbase, ExternalAddress(CompressedOops::ptrs_base_addr()));
+ cmpptr(rheapbase, ExternalAddress(CompressedOops::base_addr()));
br(Assembler::EQ, ok);
stop(msg);
bind(ok);
@@ -3133,9 +3133,9 @@ void MacroAssembler::reinit_heapbase()
{
if (UseCompressedOops) {
if (Universe::is_fully_initialized()) {
- mov(rheapbase, CompressedOops::ptrs_base());
+ mov(rheapbase, CompressedOops::base());
} else {
- lea(rheapbase, ExternalAddress(CompressedOops::ptrs_base_addr()));
+ lea(rheapbase, ExternalAddress(CompressedOops::base_addr()));
ldr(rheapbase, Address(rheapbase));
}
}
@@ -5082,8 +5082,8 @@ MacroAssembler::KlassDecodeMode MacroAssembler::klass_decode_mode() {
if (operand_valid_for_logical_immediate(
/*is32*/false, (uint64_t)CompressedKlassPointers::base())) {
- const uint64_t range_mask =
- (1ULL << log2i(CompressedKlassPointers::range())) - 1;
+ const size_t range = CompressedKlassPointers::klass_range_end() - CompressedKlassPointers::base();
+ const uint64_t range_mask = (1ULL << log2i(range)) - 1;
if (((uint64_t)CompressedKlassPointers::base() & range_mask) == 0) {
return (_klass_decode_mode = KlassDecodeXor);
}
diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
index 3117c75149854..52996f4c4a503 100644
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
@@ -49,6 +49,7 @@
#include "runtime/sharedRuntime.hpp"
#include "runtime/signature.hpp"
#include "runtime/stubRoutines.hpp"
+#include "runtime/timerTrace.hpp"
#include "runtime/vframeArray.hpp"
#include "utilities/align.hpp"
#include "utilities/formatBuffer.hpp"
@@ -2233,7 +2234,7 @@ void SharedRuntime::generate_deopt_blob() {
int reexecute_offset = __ pc() - start;
#if INCLUDE_JVMCI && !defined(COMPILER1)
- if (EnableJVMCI && UseJVMCICompiler) {
+ if (UseJVMCICompiler) {
// JVMCI does not use this kind of deoptimization
__ should_not_reach_here();
}
diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
index b3513a586de35..31116e006f025 100644
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@@ -3417,15 +3417,15 @@ class StubGenerator: public StubCodeGenerator {
Register rscratch3 = r10;
Register rscratch4 = r11;
- __ andw(rscratch3, r2, r4);
- __ bicw(rscratch4, r3, r4);
reg_cache.extract_u32(rscratch1, k);
__ movw(rscratch2, t);
- __ orrw(rscratch3, rscratch3, rscratch4);
__ addw(rscratch4, r1, rscratch2);
__ addw(rscratch4, rscratch4, rscratch1);
- __ addw(rscratch3, rscratch3, rscratch4);
- __ rorw(rscratch2, rscratch3, 32 - s);
+ __ bicw(rscratch2, r3, r4);
+ __ andw(rscratch3, r2, r4);
+ __ addw(rscratch2, rscratch2, rscratch4);
+ __ addw(rscratch2, rscratch2, rscratch3);
+ __ rorw(rscratch2, rscratch2, 32 - s);
__ addw(r1, rscratch2, r2);
}
@@ -7320,6 +7320,28 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ // load Method* target of MethodHandle
+ // j_rarg0 = jobject receiver
+ // rmethod = result
+ address generate_upcall_stub_load_target() {
+ StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target");
+ address start = __ pc();
+
+ __ resolve_global_jobject(j_rarg0, rscratch1, rscratch2);
+ // Load target method from receiver
+ __ load_heap_oop(rmethod, Address(j_rarg0, java_lang_invoke_MethodHandle::form_offset()), rscratch1, rscratch2);
+ __ load_heap_oop(rmethod, Address(rmethod, java_lang_invoke_LambdaForm::vmentry_offset()), rscratch1, rscratch2);
+ __ load_heap_oop(rmethod, Address(rmethod, java_lang_invoke_MemberName::method_offset()), rscratch1, rscratch2);
+ __ access_load_at(T_ADDRESS, IN_HEAP, rmethod,
+ Address(rmethod, java_lang_invoke_ResolvedMethodName::vmtarget_offset()),
+ noreg, noreg);
+ __ str(rmethod, Address(rthread, JavaThread::callee_target_offset())); // just in case callee is deoptimized
+
+ __ ret(lr);
+
+ return start;
+ }
+
#undef __
#define __ masm->
@@ -8241,6 +8263,7 @@ class StubGenerator: public StubCodeGenerator {
#endif
StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler();
+ StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target();
StubRoutines::aarch64::set_completed(); // Inidicate that arraycopy and zero_blocks stubs are generated
}
diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
index 38d48b86f23b0..3210789bbbdfa 100644
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
@@ -69,12 +69,6 @@ int TemplateInterpreter::InterpreterCodeSize = 200 * 1024;
#define __ _masm->
-//-----------------------------------------------------------------------------
-
-extern "C" void entry(CodeBuffer*);
-
-//-----------------------------------------------------------------------------
-
address TemplateInterpreterGenerator::generate_slow_signature_handler() {
address entry = __ pc();
diff --git a/src/hotspot/cpu/aarch64/upcallLinker_aarch64.cpp b/src/hotspot/cpu/aarch64/upcallLinker_aarch64.cpp
index 28ec07815be5c..517fccb2d1aa5 100644
--- a/src/hotspot/cpu/aarch64/upcallLinker_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/upcallLinker_aarch64.cpp
@@ -24,6 +24,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
+#include "classfile/javaClasses.hpp"
#include "logging/logStream.hpp"
#include "memory/resourceArea.hpp"
#include "prims/upcallLinker.hpp"
@@ -117,7 +118,7 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr
static const int upcall_stub_code_base_size = 1024;
static const int upcall_stub_size_per_arg = 16;
-address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
@@ -222,7 +223,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
__ block_comment("{ on_entry");
__ lea(c_rarg0, Address(sp, frame_data_offset));
- __ movptr(c_rarg1, (intptr_t)receiver);
__ movptr(rscratch1, CAST_FROM_FN_PTR(uint64_t, UpcallLinker::on_entry));
__ blr(rscratch1);
__ mov(rthread, r0);
@@ -238,12 +238,10 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
arg_shuffle.generate(_masm, as_VMStorage(shuffle_reg), abi._shadow_space_bytes, 0);
__ block_comment("} argument shuffle");
- __ block_comment("{ receiver ");
- __ get_vm_result(j_rarg0, rthread);
- __ block_comment("} receiver ");
-
- __ mov_metadata(rmethod, entry);
- __ str(rmethod, Address(rthread, JavaThread::callee_target_offset())); // just in case callee is deoptimized
+ __ block_comment("{ load target ");
+ __ movptr(j_rarg0, (intptr_t)receiver);
+ __ far_call(RuntimeAddress(StubRoutines::upcall_stub_load_target()), rscratch1); // puts target Method* in rmethod
+ __ block_comment("} load target ");
__ push_cont_fastpath(rthread);
@@ -318,7 +316,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
#ifndef PRODUCT
stringStream ss;
- ss.print("upcall_stub_%s", entry->signature()->as_C_string());
+ ss.print("upcall_stub_%s", signature->as_C_string());
const char* name = _masm->code_string(ss.as_string());
#else // PRODUCT
const char* name = "upcall_stub";
diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
index 2c7de0a58a204..716f6d87230e1 100644
--- a/src/hotspot/cpu/arm/arm.ad
+++ b/src/hotspot/cpu/arm/arm.ad
@@ -3890,6 +3890,7 @@ instruct loadRange(iRegI dst, memoryI mem) %{
instruct loadP(iRegP dst, memoryP mem) %{
+ predicate(!(UseG1GC && n->as_Load()->barrier_data() != 0));
match(Set dst (LoadP mem));
ins_cost(MEMORY_REF_COST);
size(4);
@@ -4356,6 +4357,7 @@ instruct movSP(store_ptr_RegP dst, SPRegP src) %{
instruct storeP(memoryP mem, store_ptr_RegP src) %{
+ predicate(!(UseG1GC && n->as_Store()->barrier_data() != 0));
match(Set mem (StoreP mem src));
ins_cost(MEMORY_REF_COST);
size(4);
@@ -5390,6 +5392,7 @@ instruct compareAndSwapI_bool(memoryex mem, iRegI oldval, iRegI newval, iRegI re
%}
instruct compareAndSwapP_bool(memoryex mem, iRegP oldval, iRegP newval, iRegI res, iRegI tmp, flagsReg ccr ) %{
+ predicate(!(UseG1GC && n->as_LoadStore()->barrier_data() != 0));
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
effect( KILL ccr, TEMP tmp);
size(28);
@@ -5659,6 +5662,7 @@ instruct xchgL(memoryex mem, iRegLd newval, iRegLd res, iRegI tmp, flagsReg ccr)
%}
instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp, flagsReg ccr) %{
+ predicate(!(UseG1GC && n->as_LoadStore()->barrier_data() != 0));
match(Set res (GetAndSetP mem newval));
effect(KILL ccr, TEMP tmp, TEMP res);
size(16);
diff --git a/src/hotspot/cpu/arm/assembler_arm_32.hpp b/src/hotspot/cpu/arm/assembler_arm_32.hpp
index dd04ad1ab3a3c..e53eefac097ef 100644
--- a/src/hotspot/cpu/arm/assembler_arm_32.hpp
+++ b/src/hotspot/cpu/arm/assembler_arm_32.hpp
@@ -119,8 +119,9 @@ class RegisterSet {
}
friend RegisterSet operator | (const RegisterSet set1, const RegisterSet set2) {
- assert((set1._encoding & set2._encoding) == 0,
- "encoding constraint");
+// why so strong constraint?
+// assert((set1._encoding & set2._encoding) == 0,
+// "encoding constraint");
return RegisterSet(set1._encoding | set2._encoding);
}
@@ -142,6 +143,11 @@ class RegisterSet {
}
return count;
}
+
+ static RegisterSet from(RegSet set) {
+ assert(set.size(), "RegSet must not be empty");
+ return RegisterSet(set.bits());
+ }
};
#if R9_IS_SCRATCHED
@@ -157,6 +163,10 @@ class FloatRegisterSet {
public:
+ FloatRegisterSet() {
+ _encoding = 0;
+ }
+
FloatRegisterSet(FloatRegister reg) {
if (reg->hi_bit() == 0) {
_encoding = reg->hi_bits() << 12 | reg->lo_bit() << 22 | 1;
@@ -185,6 +195,15 @@ class FloatRegisterSet {
return (_encoding & 0xFFFFFF00) | ((_encoding & 0xFF) << 1);
}
+ static FloatRegisterSet from(FloatRegSet set) {
+ assert(set.size(), "FloatRegSet must not be empty");
+ // the vector load/store instructions operate on a set of consecutive registers.
+ // for the sake of simplicity, write all registers between the first and last in the set
+ size_t range = (*set.rbegin())->encoding() - (*set.begin())->encoding() + 1;
+ // push_float stores float regisgters by pairs
+ return FloatRegisterSet(*set.begin(), (range+1)/2);
+ }
+
};
diff --git a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
index 3d8dbc38071ed..8e85fa88a8749 100644
--- a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -46,7 +46,7 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
ce->store_parameter(_bci, 0);
ce->store_parameter(_method->as_constant_ptr()->as_metadata(), 1);
- __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type);
+ __ call(Runtime1::entry_for(C1StubId::counter_overflow_id), relocInfo::runtime_call_type);
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
@@ -57,7 +57,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
if (_info->deoptimize_on_exception()) {
- __ call(Runtime1::entry_for(Runtime1::predicate_failed_trap_id), relocInfo::runtime_call_type);
+ __ call(Runtime1::entry_for(C1StubId::predicate_failed_trap_id), relocInfo::runtime_call_type);
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
debug_only(__ should_not_reach_here());
@@ -73,10 +73,10 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
}
if (_throw_index_out_of_bounds_exception) {
- __ call(Runtime1::entry_for(Runtime1::throw_index_exception_id), relocInfo::runtime_call_type);
+ __ call(Runtime1::entry_for(C1StubId::throw_index_exception_id), relocInfo::runtime_call_type);
} else {
__ str(_array->as_pointer_register(), Address(SP, BytesPerWord)); // ??? Correct offset? Correct instruction?
- __ call(Runtime1::entry_for(Runtime1::throw_range_check_failed_id), relocInfo::runtime_call_type);
+ __ call(Runtime1::entry_for(C1StubId::throw_range_check_failed_id), relocInfo::runtime_call_type);
}
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
@@ -89,7 +89,7 @@ PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
- __ call(Runtime1::entry_for(Runtime1::predicate_failed_trap_id), relocInfo::runtime_call_type);
+ __ call(Runtime1::entry_for(C1StubId::predicate_failed_trap_id), relocInfo::runtime_call_type);
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
debug_only(__ should_not_reach_here());
@@ -100,7 +100,7 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) {
ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
}
__ bind(_entry);
- __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id),
+ __ call(Runtime1::entry_for(C1StubId::throw_div0_exception_id),
relocInfo::runtime_call_type);
ce->add_call_info_here(_info);
DEBUG_ONLY(STOP("DivByZero");)
@@ -109,14 +109,14 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) {
// Implementation of NewInstanceStub
-NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, C1StubId stub_id) {
_result = result;
_klass = klass;
_klass_reg = klass_reg;
_info = new CodeEmitInfo(info);
- assert(stub_id == Runtime1::new_instance_id ||
- stub_id == Runtime1::fast_new_instance_id ||
- stub_id == Runtime1::fast_new_instance_init_check_id,
+ assert(stub_id == C1StubId::new_instance_id ||
+ stub_id == C1StubId::fast_new_instance_id ||
+ stub_id == C1StubId::fast_new_instance_init_check_id,
"need new_instance id");
_stub_id = stub_id;
}
@@ -148,7 +148,7 @@ void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
assert(_klass_reg->as_register() == R1, "runtime call setup");
assert(_length->as_register() == R2, "runtime call setup");
__ bind(_entry);
- __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type);
+ __ call(Runtime1::entry_for(C1StubId::new_type_array_id), relocInfo::runtime_call_type);
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
__ b(_continuation);
@@ -170,7 +170,7 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
assert(_klass_reg->as_register() == R1, "runtime call setup");
assert(_length->as_register() == R2, "runtime call setup");
__ bind(_entry);
- __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type);
+ __ call(Runtime1::entry_for(C1StubId::new_object_array_id), relocInfo::runtime_call_type);
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
__ b(_continuation);
@@ -189,9 +189,9 @@ void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
__ str(lock_reg, Address(SP, BytesPerWord));
}
- Runtime1::StubID enter_id = ce->compilation()->has_fpu_code() ?
- Runtime1::monitorenter_id :
- Runtime1::monitorenter_nofpu_id;
+ C1StubId enter_id = ce->compilation()->has_fpu_code() ?
+ C1StubId::monitorenter_id :
+ C1StubId::monitorenter_nofpu_id;
__ call(Runtime1::entry_for(enter_id), relocInfo::runtime_call_type);
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
@@ -210,9 +210,9 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
__ str(lock_reg, Address(SP));
// Non-blocking leaf routine - no call info needed
- Runtime1::StubID exit_id = ce->compilation()->has_fpu_code() ?
- Runtime1::monitorexit_id :
- Runtime1::monitorexit_nofpu_id;
+ C1StubId exit_id = ce->compilation()->has_fpu_code() ?
+ C1StubId::monitorexit_id :
+ C1StubId::monitorexit_nofpu_id;
__ call(Runtime1::entry_for(exit_id), relocInfo::runtime_call_type);
__ b(_continuation);
}
@@ -322,10 +322,10 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
address target = nullptr;
relocInfo::relocType reloc_type = relocInfo::none;
switch (_id) {
- case access_field_id: target = Runtime1::entry_for(Runtime1::access_field_patching_id); break;
- case load_klass_id: target = Runtime1::entry_for(Runtime1::load_klass_patching_id); reloc_type = relocInfo::metadata_type; break;
- case load_mirror_id: target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); reloc_type = relocInfo::oop_type; break;
- case load_appendix_id: target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); reloc_type = relocInfo::oop_type; break;
+ case access_field_id: target = Runtime1::entry_for(C1StubId::access_field_patching_id); break;
+ case load_klass_id: target = Runtime1::entry_for(C1StubId::load_klass_patching_id); reloc_type = relocInfo::metadata_type; break;
+ case load_mirror_id: target = Runtime1::entry_for(C1StubId::load_mirror_patching_id); reloc_type = relocInfo::oop_type; break;
+ case load_appendix_id: target = Runtime1::entry_for(C1StubId::load_appendix_patching_id); reloc_type = relocInfo::oop_type; break;
default: ShouldNotReachHere();
}
__ bind(call_patch);
@@ -351,7 +351,7 @@ void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
__ mov_slow(Rtemp, _trap_request);
ce->verify_reserved_argument_area_size(1);
__ str(Rtemp, Address(SP));
- __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type);
+ __ call(Runtime1::entry_for(C1StubId::deoptimize_id), relocInfo::runtime_call_type);
ce->add_call_info_here(_info);
DEBUG_ONLY(__ should_not_reach_here());
}
@@ -362,9 +362,9 @@ void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
if (_info->deoptimize_on_exception()) {
// Deoptimize, do not throw the exception, because it is
// probably wrong to do it here.
- a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+ a = Runtime1::entry_for(C1StubId::predicate_failed_trap_id);
} else {
- a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+ a = Runtime1::entry_for(C1StubId::throw_null_pointer_exception_id);
}
ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
__ bind(_entry);
diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
index 999f8fe590472..bb6a93e6f8da7 100644
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
@@ -213,7 +213,7 @@ int LIR_Assembler::emit_exception_handler() {
// check that there is really an exception
__ verify_not_null_oop(Rexception_obj);
- __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type);
+ __ call(Runtime1::entry_for(C1StubId::handle_exception_from_callee_id), relocInfo::runtime_call_type);
__ should_not_reach_here();
assert(code_offset() - offset <= exception_handler_size(), "overflow");
@@ -253,7 +253,7 @@ int LIR_Assembler::emit_unwind_handler() {
// remove the activation and dispatch to the unwind handler
__ remove_frame(initial_frame_size_in_bytes()); // restores FP and LR
- __ jump(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type, Rtemp);
+ __ jump(Runtime1::entry_for(C1StubId::unwind_exception_id), relocInfo::runtime_call_type, Rtemp);
// Emit the slow path assembly
if (stub != nullptr) {
@@ -1136,7 +1136,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ b(*failure_target, ne);
// slow case
assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup");
- __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+ __ call(Runtime1::entry_for(C1StubId::slow_subtype_check_id), relocInfo::runtime_call_type);
__ cbz(R0, *failure_target);
if (op->should_profile()) {
Register mdo = klass_RInfo, recv = k_RInfo, tmp1 = Rtemp;
@@ -1210,7 +1210,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ cmp(Rtemp, k_RInfo, ne);
__ b(*success_target, eq);
assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup");
- __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+ __ call(Runtime1::entry_for(C1StubId::slow_subtype_check_id), relocInfo::runtime_call_type);
__ cbz(R0, *failure_target);
}
} else {
@@ -1227,7 +1227,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ b(*failure_target, ne);
// slow case
assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup");
- __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+ __ call(Runtime1::entry_for(C1StubId::slow_subtype_check_id), relocInfo::runtime_call_type);
__ cbz(R0, *failure_target);
}
@@ -1303,7 +1303,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
}
__ b(*success_target, eq);
assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup");
- __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+ __ call(Runtime1::entry_for(C1StubId::slow_subtype_check_id), relocInfo::runtime_call_type);
if (!op->should_profile()) {
move_regs(R0, res);
} else {
@@ -1334,7 +1334,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ b(*failure_target, ne);
// slow case
assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup");
- __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+ __ call(Runtime1::entry_for(C1StubId::slow_subtype_check_id), relocInfo::runtime_call_type);
if (!op->should_profile()) {
move_regs(R0, res);
}
@@ -1981,9 +1981,9 @@ void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmit
assert(exceptionPC->as_register() == Rexception_pc, "must match");
info->add_register_oop(exceptionOop);
- Runtime1::StubID handle_id = compilation()->has_fpu_code() ?
- Runtime1::handle_exception_id :
- Runtime1::handle_exception_nofpu_id;
+ C1StubId handle_id = compilation()->has_fpu_code() ?
+ C1StubId::handle_exception_id :
+ C1StubId::handle_exception_nofpu_id;
Label return_address;
__ adr(Rexception_pc, return_address);
__ call(Runtime1::entry_for(handle_id), relocInfo::runtime_call_type);
@@ -2260,7 +2260,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
__ mov(altFP_7_11, R1);
__ mov(R0, tmp);
__ mov(R1, tmp2);
- __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); // does not blow any registers except R0, LR and Rtemp
+ __ call(Runtime1::entry_for(C1StubId::slow_subtype_check_id), relocInfo::runtime_call_type); // does not blow any registers except R0, LR and Rtemp
__ cmp_32(R0, 0);
__ mov(R0, R6);
__ mov(R1, altFP_7_11);
diff --git a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp
index f4e3812d77cff..adda0c1c290db 100644
--- a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1054,7 +1054,7 @@ void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
args->append(rank);
args->append(varargs);
LIR_Opr reg = result_register_for(x->type());
- __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
+ __ call_runtime(Runtime1::entry_for(C1StubId::new_multi_array_id),
LIR_OprFact::illegalOpr, reg, args, info);
LIR_Opr result = rlock_result(x);
@@ -1083,7 +1083,7 @@ void LIRGenerator::do_CheckCast(CheckCast* x) {
CodeStub* stub;
if (x->is_incompatible_class_change_check()) {
assert(patching_info == nullptr, "can't patch this");
- stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id,
+ stub = new SimpleExceptionStub(C1StubId::throw_incompatible_class_change_error_id,
LIR_OprFact::illegalOpr, info_for_exception);
} else if (x->is_invokespecial_receiver_check()) {
assert(patching_info == nullptr, "can't patch this");
@@ -1091,7 +1091,7 @@ void LIRGenerator::do_CheckCast(CheckCast* x) {
Deoptimization::Reason_class_check,
Deoptimization::Action_none);
} else {
- stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id,
+ stub = new SimpleExceptionStub(C1StubId::throw_class_cast_exception_id,
LIR_OprFact::illegalOpr, info_for_exception);
}
diff --git a/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp b/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp
index 335baf5f16638..b5117dedc424e 100644
--- a/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp
@@ -65,7 +65,7 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre
reset_last_Java_frame(Rtemp);
assert(frame_size() != no_frame_size, "frame must be fixed");
- if (_stub_id != Runtime1::forward_exception_id) {
+ if (_stub_id != (int)C1StubId::forward_exception_id) {
ldr(R3, Address(Rthread, Thread::pending_exception_offset()));
}
@@ -81,10 +81,10 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre
// Check for pending exception
// unpack_with_exception_in_tls path is taken through
// Runtime1::exception_handler_for_pc
- if (_stub_id != Runtime1::forward_exception_id) {
+ if (_stub_id != (int)C1StubId::forward_exception_id) {
assert(frame_size() != no_frame_size, "cannot directly call forward_exception_id");
cmp(R3, 0);
- jump(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type, Rtemp, ne);
+ jump(Runtime1::entry_for(C1StubId::forward_exception_id), relocInfo::runtime_call_type, Rtemp, ne);
} else {
#ifdef ASSERT
// Should not have pending exception in forward_exception stub
@@ -280,7 +280,7 @@ static void restore_sp_for_method_handle(StubAssembler* sasm) {
}
-OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler* sasm) {
+OopMapSet* Runtime1::generate_handle_exception(C1StubId id, StubAssembler* sasm) {
__ block_comment("generate_handle_exception");
bool save_fpu_registers = false;
@@ -290,7 +290,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler* sasm) {
OopMap* oop_map = nullptr;
switch (id) {
- case forward_exception_id: {
+ case C1StubId::forward_exception_id: {
save_fpu_registers = HaveVFP;
oop_map = generate_oop_map(sasm);
__ ldr(Rexception_obj, Address(Rthread, Thread::pending_exception_offset()));
@@ -299,14 +299,14 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler* sasm) {
__ str(zero, Address(Rthread, Thread::pending_exception_offset()));
break;
}
- case handle_exception_id:
+ case C1StubId::handle_exception_id:
save_fpu_registers = HaveVFP;
// fall-through
- case handle_exception_nofpu_id:
+ case C1StubId::handle_exception_nofpu_id:
// At this point all registers MAY be live.
oop_map = save_live_registers(sasm, save_fpu_registers);
break;
- case handle_exception_from_callee_id:
+ case C1StubId::handle_exception_from_callee_id:
// At this point all registers except exception oop (R4/R19) and
// exception pc (R5/R20) are dead.
oop_map = save_live_registers(sasm); // TODO it's not required to save all registers
@@ -328,13 +328,13 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler* sasm) {
// Restore the registers that were saved at the beginning, remove
// frame and jump to the exception handler.
switch (id) {
- case forward_exception_id:
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::forward_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
restore_live_registers(sasm, save_fpu_registers);
// Note: the restore live registers includes the jump to LR (patched to R0)
break;
- case handle_exception_from_callee_id:
+ case C1StubId::handle_exception_from_callee_id:
restore_live_registers_without_return(sasm); // must not jump immediately to handler
restore_sp_for_method_handle(sasm);
__ ret();
@@ -403,7 +403,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
}
-OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) {
const bool must_gc_arguments = true;
const bool dont_gc_arguments = false;
@@ -411,16 +411,16 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
bool save_fpu_registers = HaveVFP;
switch (id) {
- case forward_exception_id:
+ case C1StubId::forward_exception_id:
{
oop_maps = generate_handle_exception(id, sasm);
// does not return on ARM
}
break;
- case new_instance_id:
- case fast_new_instance_id:
- case fast_new_instance_init_check_id:
+ case C1StubId::new_instance_id:
+ case C1StubId::fast_new_instance_id:
+ case C1StubId::fast_new_instance_init_check_id:
{
const Register result = R0;
const Register klass = R1;
@@ -436,7 +436,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case counter_overflow_id:
+ case C1StubId::counter_overflow_id:
{
OopMap* oop_map = save_live_registers(sasm);
__ ldr(R1, Address(SP, arg1_offset));
@@ -448,10 +448,10 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case new_type_array_id:
- case new_object_array_id:
+ case C1StubId::new_type_array_id:
+ case C1StubId::new_object_array_id:
{
- if (id == new_type_array_id) {
+ if (id == C1StubId::new_type_array_id) {
__ set_info("new_type_array", dont_gc_arguments);
} else {
__ set_info("new_object_array", dont_gc_arguments);
@@ -463,7 +463,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
OopMap* map = save_live_registers(sasm);
int call_offset;
- if (id == new_type_array_id) {
+ if (id == C1StubId::new_type_array_id) {
call_offset = __ call_RT(result, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
} else {
call_offset = __ call_RT(result, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
@@ -477,7 +477,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case new_multi_array_id:
+ case C1StubId::new_multi_array_id:
{
__ set_info("new_multi_array", dont_gc_arguments);
@@ -500,7 +500,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case register_finalizer_id:
+ case C1StubId::register_finalizer_id:
{
__ set_info("register_finalizer", dont_gc_arguments);
@@ -521,78 +521,78 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case throw_range_check_failed_id:
+ case C1StubId::throw_range_check_failed_id:
{
__ set_info("range_check_failed", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
}
break;
- case throw_index_exception_id:
+ case C1StubId::throw_index_exception_id:
{
__ set_info("index_range_check_failed", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
}
break;
- case throw_div0_exception_id:
+ case C1StubId::throw_div0_exception_id:
{
__ set_info("throw_div0_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
}
break;
- case throw_null_pointer_exception_id:
+ case C1StubId::throw_null_pointer_exception_id:
{
__ set_info("throw_null_pointer_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
}
break;
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
{
__ set_info("handle_exception", dont_gc_arguments);
oop_maps = generate_handle_exception(id, sasm);
}
break;
- case handle_exception_from_callee_id:
+ case C1StubId::handle_exception_from_callee_id:
{
__ set_info("handle_exception_from_callee", dont_gc_arguments);
oop_maps = generate_handle_exception(id, sasm);
}
break;
- case unwind_exception_id:
+ case C1StubId::unwind_exception_id:
{
__ set_info("unwind_exception", dont_gc_arguments);
generate_unwind_exception(sasm);
}
break;
- case throw_array_store_exception_id:
+ case C1StubId::throw_array_store_exception_id:
{
__ set_info("throw_array_store_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
}
break;
- case throw_class_cast_exception_id:
+ case C1StubId::throw_class_cast_exception_id:
{
__ set_info("throw_class_cast_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
}
break;
- case throw_incompatible_class_change_error_id:
+ case C1StubId::throw_incompatible_class_change_error_id:
{
__ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
}
break;
- case slow_subtype_check_id:
+ case C1StubId::slow_subtype_check_id:
{
// (in) R0 - sub, destroyed,
// (in) R1 - super, not changed
@@ -625,10 +625,10 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case monitorenter_nofpu_id:
+ case C1StubId::monitorenter_nofpu_id:
save_fpu_registers = false;
// fall through
- case monitorenter_id:
+ case C1StubId::monitorenter_id:
{
__ set_info("monitorenter", dont_gc_arguments);
const Register obj = R1;
@@ -643,10 +643,10 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case monitorexit_nofpu_id:
+ case C1StubId::monitorexit_nofpu_id:
save_fpu_registers = false;
// fall through
- case monitorexit_id:
+ case C1StubId::monitorexit_id:
{
__ set_info("monitorexit", dont_gc_arguments);
const Register lock = R1;
@@ -659,7 +659,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case deoptimize_id:
+ case C1StubId::deoptimize_id:
{
__ set_info("deoptimize", dont_gc_arguments);
OopMap* oop_map = save_live_registers(sasm);
@@ -675,35 +675,35 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case access_field_patching_id:
+ case C1StubId::access_field_patching_id:
{
__ set_info("access_field_patching", dont_gc_arguments);
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
}
break;
- case load_klass_patching_id:
+ case C1StubId::load_klass_patching_id:
{
__ set_info("load_klass_patching", dont_gc_arguments);
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
}
break;
- case load_appendix_patching_id:
+ case C1StubId::load_appendix_patching_id:
{
__ set_info("load_appendix_patching", dont_gc_arguments);
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
}
break;
- case load_mirror_patching_id:
+ case C1StubId::load_mirror_patching_id:
{
__ set_info("load_mirror_patching", dont_gc_arguments);
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
}
break;
- case predicate_failed_trap_id:
+ case C1StubId::predicate_failed_trap_id:
{
__ set_info("predicate_failed_trap", dont_gc_arguments);
diff --git a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp
index 3c5e29aa8710f..56ae7707fbf38 100644
--- a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp
@@ -39,8 +39,10 @@
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/g1/c1/g1BarrierSetC1.hpp"
-#endif
-
+#endif // COMPILER1
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif // COMPILER2
#define __ masm->
#ifdef PRODUCT
@@ -106,70 +108,87 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
#endif // !R9_IS_SCRATCHED
}
-// G1 pre-barrier.
-// Blows all volatile registers R0-R3, Rtemp, LR).
-// If store_addr != noreg, then previous value is loaded from [store_addr];
-// in such case store_addr and new_val registers are preserved;
-// otherwise pre_val register is preserved.
-void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
- Register store_addr,
- Register new_val,
- Register pre_val,
- Register tmp1,
- Register tmp2) {
- Label done;
- Label runtime;
-
- if (store_addr != noreg) {
- assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg);
- } else {
- assert (new_val == noreg, "should be");
- assert_different_registers(pre_val, tmp1, tmp2, noreg);
- }
-
- Address in_progress(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
- Address index(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
- Address buffer(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
+static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
+ const Register thread, const Register value, const Register temp1, const Register temp2) {
+ assert_different_registers(value, temp1, temp2);
+ // Can we store original value in the thread's buffer?
+ // (The index field is typed as size_t.)
+ __ ldr(temp1, Address(thread, in_bytes(index_offset))); // temp1 := *(index address)
+ __ cbz(temp1, runtime); // jump to runtime if index == 0 (full buffer)
+ // The buffer is not full, store value into it.
+ __ sub(temp1, temp1, wordSize); // temp1 := next index
+ __ str(temp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index
+ __ ldr(temp2, Address(thread, in_bytes(buffer_offset))); // temp2 := buffer address
+ // Record the previous value
+ __ str(value, Address(temp2, temp1)); // *(buffer address + next index) := value
+ }
+static void generate_pre_barrier_fast_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp1) {
+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
// Is marking active?
assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code");
__ ldrb(tmp1, in_progress);
- __ cbz(tmp1, done);
+}
+static void generate_pre_barrier_slow_path(MacroAssembler* masm,
+ const Register obj,
+ const Register pre_val,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ Label& runtime) {
// Do we need to load the previous value?
- if (store_addr != noreg) {
- __ load_heap_oop(pre_val, Address(store_addr, 0));
+ if (obj != noreg) {
+ __ load_heap_oop(pre_val, Address(obj, 0));
}
// Is the previous value null?
__ cbz(pre_val, done);
- // Can we store original value in the thread's buffer?
- // Is index == 0?
- // (The index field is typed as size_t.)
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::satb_mark_queue_index_offset(),
+ G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ runtime,
+ thread, pre_val, tmp1, tmp2);
+ __ b(done);
+}
- __ ldr(tmp1, index); // tmp1 := *index_adr
- __ ldr(tmp2, buffer);
+// G1 pre-barrier.
+// Blows all volatile registers R0-R3, LR).
+// If obj != noreg, then previous value is loaded from [obj];
+// in such case obj and pre_val registers is preserved;
+// otherwise pre_val register is preserved.
+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2) {
+ Label done;
+ Label runtime;
- __ subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize
- __ b(runtime, lt); // If negative, goto runtime
+ assert_different_registers(obj, pre_val, tmp1, tmp2, noreg);
- __ str(tmp1, index); // *index_adr := tmp1
+ generate_pre_barrier_fast_path(masm, Rthread, tmp1);
+ // If marking is not active (*(mark queue active address) == 0), jump to done
+ __ cbz(tmp1, done);
- // Record the previous value
- __ str(pre_val, Address(tmp2, tmp1));
- __ b(done);
+ generate_pre_barrier_slow_path(masm, obj, pre_val, Rthread, tmp1, tmp2, done, runtime);
__ bind(runtime);
// save the live input values
- if (store_addr != noreg) {
- // avoid raw_push to support any ordering of store_addr and new_val
- __ push(RegisterSet(store_addr) | RegisterSet(new_val));
- } else {
- __ push(pre_val);
+ RegisterSet set = RegisterSet(pre_val) | RegisterSet(R0, R3) | RegisterSet(R12);
+ // save the live input values
+ if (obj != noreg) {
+ // avoid raw_push to support any ordering of store_addr and pre_val
+ set = set | RegisterSet(obj);
}
+ __ push(set);
+
if (pre_val != R0) {
__ mov(R0, pre_val);
}
@@ -177,33 +196,17 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), R0, R1);
- if (store_addr != noreg) {
- __ pop(RegisterSet(store_addr) | RegisterSet(new_val));
- } else {
- __ pop(pre_val);
- }
-
+ __ pop(set);
__ bind(done);
}
-// G1 post-barrier.
-// Blows all volatile registers R0-R3, Rtemp, LR).
-void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
- Register store_addr,
- Register new_val,
- Register tmp1,
- Register tmp2,
- Register tmp3) {
-
- Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
- Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
-
- BarrierSet* bs = BarrierSet::barrier_set();
- CardTableBarrierSet* ctbs = barrier_set_cast(bs);
- CardTable* ct = ctbs->card_table();
- Label done;
- Label runtime;
-
+static void generate_post_barrier_fast_path(MacroAssembler* masm,
+ const Register store_addr,
+ const Register new_val,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ bool new_val_may_be_null) {
// Does store cross heap regions?
__ eor(tmp1, store_addr, new_val);
@@ -211,22 +214,31 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
__ b(done, eq);
// crosses regions, storing null?
-
- __ cbz(new_val, done);
-
+ if (new_val_may_be_null) {
+ __ cbz(new_val, done);
+ }
// storing region crossing non-null, is card already dirty?
const Register card_addr = tmp1;
- __ mov_address(tmp2, (address)ct->byte_map_base());
+ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
+ __ mov_address(tmp2, (address)ct->card_table()->byte_map_base());
__ add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift()));
__ ldrb(tmp2, Address(card_addr));
__ cmp(tmp2, (int)G1CardTable::g1_young_card_val());
- __ b(done, eq);
+}
+static void generate_post_barrier_slow_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ const Register tmp3,
+ Label& done,
+ Label& runtime) {
__ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2);
-
assert(CardTable::dirty_card_val() == 0, "adjust this code");
+ // card_addr is loaded by generate_post_barrier_fast_path
+ const Register card_addr = tmp1;
__ ldrb(tmp2, Address(card_addr));
__ cbz(tmp2, done);
@@ -234,29 +246,139 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
// dirty card and log.
__ strb(__ zero_register(tmp2), Address(card_addr));
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ runtime,
+ thread, card_addr, tmp2, tmp3);
+ __ b(done);
+}
- __ ldr(tmp2, queue_index);
- __ ldr(tmp3, buffer);
- __ subs(tmp2, tmp2, wordSize);
- __ b(runtime, lt); // go to runtime if now negative
-
- __ str(tmp2, queue_index);
+// G1 post-barrier.
+// Blows all volatile registers R0-R3, LR).
+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3) {
+ Label done;
+ Label runtime;
- __ str(card_addr, Address(tmp3, tmp2));
- __ b(done);
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
+ // If card is young, jump to done
+ // card_addr and card are loaded by generate_post_barrier_fast_path
+ const Register card = tmp2;
+ const Register card_addr = tmp1;
+ __ b(done, eq);
+ generate_post_barrier_slow_path(masm, Rthread, card_addr, tmp2, tmp3, done, runtime);
__ bind(runtime);
+ RegisterSet set = RegisterSet(store_addr) | RegisterSet(R0, R3) | RegisterSet(R12);
+ __ push(set);
+
if (card_addr != R0) {
__ mov(R0, card_addr);
}
__ mov(R1, Rthread);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), R0, R1);
+ __ pop(set);
+
__ bind(done);
}
+#if defined(COMPILER2)
+
+static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path, Register tmp1) {
+ SaveLiveRegisters save_registers(masm, stub);
+ if (c_rarg0 != arg) {
+ __ mov(c_rarg0, arg);
+ }
+ __ mov(c_rarg1, Rthread);
+ __ call_VM_leaf(runtime_path, R0, R1);
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* stub) {
+ assert(thread == Rthread, "must be");
+ assert_different_registers(obj, pre_val, tmp1, tmp2);
+ assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
+
+ stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2);
+
+ generate_pre_barrier_fast_path(masm, thread, tmp1);
+ // If marking is active (*(mark queue active address) != 0), jump to stub (slow path)
+ __ cbnz(tmp1, *stub->entry());
+
+ __ bind(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register obj = stub->obj();
+ Register pre_val = stub->pre_val();
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1();
+ Register tmp2 = stub->tmp2();
+
+ __ bind(*stub->entry());
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime);
+
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), tmp1);
+ __ b(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3,
+ G1PostBarrierStubC2* stub) {
+ assert(thread == Rthread, "must be");
+ assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
+
+ stub->initialize_registers(thread, tmp1, tmp2, tmp3);
+
+ bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
+ // If card is not young, jump to stub (slow path)
+ __ b(*stub->entry(), ne);
+
+ __ bind(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
+ Register tmp2 = stub->tmp2();
+ Register tmp3 = stub->tmp3();
+
+ __ bind(*stub->entry());
+ generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, tmp3, *stub->continuation(), runtime);
+
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp2);
+ __ b(*stub->continuation());
+}
+
+#endif // COMPILER2
+
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1, Register tmp2, Register tmp3) {
bool on_oop = type == T_OBJECT || type == T_ARRAY;
@@ -268,7 +390,7 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator
if (on_oop && on_reference) {
// Generate the G1 pre-barrier code to log the value of
// the referent field in an SATB buffer.
- g1_write_barrier_pre(masm, noreg, noreg, dst, tmp1, tmp2);
+ g1_write_barrier_pre(masm, noreg, dst, tmp1, tmp2);
}
}
@@ -295,7 +417,7 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
}
if (needs_pre_barrier) {
- g1_write_barrier_pre(masm, store_addr, new_val, tmp1, tmp2, tmp3);
+ g1_write_barrier_pre(masm, store_addr, tmp3 /*pre_val*/, tmp1, tmp2);
}
if (is_null) {
diff --git a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp
index 52932faa3e4de..aefde19142e40 100644
--- a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp
+++ b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp
@@ -33,6 +33,8 @@ class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
+class G1PreBarrierStubC2;
+class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -43,7 +45,6 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
void g1_write_barrier_pre(MacroAssembler* masm,
Register store_addr,
- Register new_val,
Register pre_val,
Register tmp1,
Register tmp2);
@@ -70,6 +71,29 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
#endif
+
+#ifdef COMPILER2
+ void g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* c2_stub);
+ void generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const;
+ void g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3,
+ G1PostBarrierStubC2* c2_stub);
+ void generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const;
+#endif
+
};
#endif // CPU_ARM_GC_G1_G1BARRIERSETASSEMBLER_ARM_HPP
diff --git a/src/hotspot/cpu/arm/gc/g1/g1_arm.ad b/src/hotspot/cpu/arm/gc/g1/g1_arm.ad
new file mode 100644
index 0000000000000..8a0a9e1aa531a
--- /dev/null
+++ b/src/hotspot/cpu/arm/gc/g1/g1_arm.ad
@@ -0,0 +1,201 @@
+//
+// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+source_hpp %{
+
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#include "gc/shared/gc_globals.hpp"
+
+%}
+
+source %{
+
+#include "gc/g1/g1BarrierSetAssembler_arm.hpp"
+#include "gc/g1/g1BarrierSetRuntime.hpp"
+
+static void write_barrier_pre(MacroAssembler* masm,
+ const MachNode* node,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2,
+ RegSet preserve = RegSet(),
+ RegSet no_preserve = RegSet()) {
+ if (!G1PreBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node);
+ for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) {
+ stub->preserve(*reg);
+ }
+ for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) {
+ stub->dont_preserve(*reg);
+ }
+ g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, Rthread, tmp1, tmp2, stub);
+}
+
+static void write_barrier_post(MacroAssembler* masm,
+ const MachNode* node,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3) {
+ if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Rthread, tmp1, tmp2, tmp3, stub);
+}
+
+%}
+
+instruct g1StoreP(indirect mem, iRegP src, iRegP tmp1, iRegP tmp2, iRegP tmp3, flagsReg icc)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreP mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL icc);
+ ins_cost(2 * (MEMORY_REF_COST + BRANCH_COST));
+ format %{ "sd $src, $mem\t# ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ str($src$$Register, Address($mem$$Register));
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ $tmp3$$Register /* tmp3 */);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+
+instruct g1CompareAndSwapP(iRegI res, indirect mem, iRegP newval, iRegP tmp1, iRegP tmp2, iRegP tmp3, iRegP oldval, flagsReg ccr )
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ effect(KILL ccr, TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ ins_cost(4 * (MEMORY_REF_COST + BRANCH_COST));
+ format %{ "loop: \n\t"
+ "LDREX $tmp1, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
+ "CMP $tmp1, $oldval\n\t"
+ "STREX.eq $tmp1, $newval, $mem\n\t"
+ "MOV.ne $tmp1, 0 \n\t"
+ "EORS.eq $tmp1,$tmp1, 1 \n\t"
+ "B.eq loop \n\t"
+ "MOV $res, $tmp1" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ Label loop;
+ __ bind(loop);
+ __ ldrex($tmp1$$Register,$mem$$Address);
+ __ cmp($tmp1$$Register, $oldval$$Register);
+ __ strex($tmp1$$Register, $newval$$Register, $mem$$Address, eq);
+ __ mov($tmp1$$Register, 0, ne);
+ __ eors($tmp1$$Register, $tmp1$$Register, 1, eq);
+ __ b(loop, eq);
+ __ mov($res$$Register, $tmp1$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ $tmp3$$Register /* tmp3 */);
+ %}
+ ins_pipe(long_memory_op);
+%}
+
+
+instruct g1GetAndSetP(indirect mem, iRegP newval, iRegP tmp1, iRegP tmp2, iRegP tmp3, iRegP preval, flagsReg ccr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetP mem newval));
+ effect(KILL ccr, TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ ins_cost(4 * (MEMORY_REF_COST + BRANCH_COST));
+ format %{ "loop: \n\t"
+ "LDREX $preval, $mem\n\t"
+ "STREX $tmp1, $newval, $mem\n\t"
+ "CMP $tmp1, 0 \n\t"
+ "B.ne loop \n\t" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $preval$$Register /* pre_val (as a temporary register) */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ Label loop;
+ __ bind(loop);
+ __ ldrex($preval$$Register,$mem$$Address);
+ __ strex($tmp1$$Register, $newval$$Register, $mem$$Address);
+ __ cmp($tmp1$$Register, 0);
+ __ b(loop, ne);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ $tmp3$$Register /* tmp3 */);
+ %}
+ ins_pipe(long_memory_op);
+%}
+
+instruct g1LoadP(iRegP dst, indirect mem, iRegP tmp1, iRegP tmp2, flagsReg icc)
+%{
+ predicate(UseG1GC && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadP mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL icc);
+ ins_cost(MEMORY_REF_COST + BRANCH_COST);
+ format %{ "ld $dst, $mem\t# ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ __ ldr($dst$$Register, Address($mem$$Register));
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $dst$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(iload_mem);
+%}
diff --git a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp
index ea19730673cb6..c13a259a1b960 100644
--- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp
@@ -31,6 +31,10 @@
#include "runtime/javaThread.hpp"
#include "runtime/stubRoutines.hpp"
+#ifdef COMPILER2
+#include "gc/shared/c2/barrierSetC2.hpp"
+#endif // COMPILER2
+
#define __ masm->
void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
@@ -206,7 +210,57 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
#ifdef COMPILER2
OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
- Unimplemented(); // This must be implemented to support late barrier expansion.
+ if (!OptoReg::is_reg(opto_reg)) {
+ return OptoReg::Bad;
+ }
+
+ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+ if (!vm_reg->is_valid()){
+ // skip APSR and FPSCR
+ return OptoReg::Bad;
+ }
+
+ return opto_reg;
}
+void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
+ // Record registers that needs to be saved/restored
+ RegMaskIterator rmi(stub->preserve_set());
+ while (rmi.has_next()) {
+ const OptoReg::Name opto_reg = rmi.next();
+ if (OptoReg::is_reg(opto_reg)) {
+ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+ if (vm_reg->is_Register()) {
+ gp_regs += RegSet::of(vm_reg->as_Register());
+ } else if (vm_reg->is_FloatRegister()) {
+ fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
+ } else {
+ fatal("Unknown register type");
+ }
+ }
+ }
+ // Remove C-ABI SOE registers that will be updated
+ gp_regs -= RegSet::range(R4, R11) + RegSet::of(R13, R15);
+
+ // Remove C-ABI SOE fp registers
+ fp_regs -= FloatRegSet::range(S16, S31);
+}
+
+SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub)
+ : masm(masm),
+ gp_regs(),
+ fp_regs() {
+ // Figure out what registers to save/restore
+ initialize(stub);
+
+ // Save registers
+ if (gp_regs.size() > 0) __ push(RegisterSet::from(gp_regs));
+ if (fp_regs.size() > 0) __ fpush(FloatRegisterSet::from(fp_regs));
+}
+
+SaveLiveRegisters::~SaveLiveRegisters() {
+ // Restore registers
+ if (fp_regs.size() > 0) __ fpop(FloatRegisterSet::from(fp_regs));
+ if (gp_regs.size() > 0) __ pop(RegisterSet::from(gp_regs));
+}
#endif // COMPILER2
diff --git a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp
index 60021390ea26f..054d172f46340 100644
--- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp
+++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp
@@ -31,7 +31,9 @@
#ifdef COMPILER2
#include "code/vmreg.hpp"
#include "opto/optoreg.hpp"
+#include "opto/regmask.hpp"
+class BarrierStubC2;
class Node;
#endif // COMPILER2
@@ -69,4 +71,26 @@ class BarrierSetAssembler: public CHeapObj {
#endif // COMPILER2
};
+#ifdef COMPILER2
+// This class saves and restores the registers that need to be preserved across
+// the runtime call represented by a given C2 barrier stub. Use as follows:
+// {
+// SaveLiveRegisters save(masm, stub);
+// ..
+// __ bl(...);
+// ..
+// }
+class SaveLiveRegisters {
+private:
+ MacroAssembler* const masm;
+ RegSet gp_regs;
+ FloatRegSet fp_regs;
+
+public:
+ void initialize(BarrierStubC2* stub);
+ SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub);
+ ~SaveLiveRegisters();
+};
+
+#endif // COMPILER2
#endif // CPU_ARM_GC_SHARED_BARRIERSETASSEMBLER_ARM_HPP
diff --git a/src/hotspot/cpu/arm/register_arm.hpp b/src/hotspot/cpu/arm/register_arm.hpp
index 9f486d2a62586..d8961fd293578 100644
--- a/src/hotspot/cpu/arm/register_arm.hpp
+++ b/src/hotspot/cpu/arm/register_arm.hpp
@@ -303,6 +303,31 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
static const int max_fpr;
};
+typedef AbstractRegSet RegSet;
+typedef AbstractRegSet FloatRegSet;
+
+template <>
+inline Register AbstractRegSet::first() {
+ if (_bitset == 0) { return noreg; }
+ return as_Register(count_trailing_zeros(_bitset));
+}
+
+
+template <>
+inline FloatRegister AbstractRegSet::first() {
+ uint32_t first = _bitset & -_bitset;
+ return first ? as_FloatRegister(exact_log2(first)) : fnoreg;
+}
+
+template <>
+inline FloatRegister AbstractRegSet::last() {
+ if (_bitset == 0) { return fnoreg; }
+ int last = max_size() - 1 - count_leading_zeros(_bitset);
+ return as_FloatRegister(last);
+}
+
+
+
class VFPSystemRegisterImpl;
typedef VFPSystemRegisterImpl* VFPSystemRegister;
class VFPSystemRegisterImpl : public AbstractRegisterImpl {
diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
index 7648e5c5d9260..7c1f3aafe7d52 100644
--- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
+++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
@@ -38,6 +38,7 @@
#include "runtime/sharedRuntime.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/stubRoutines.hpp"
+#include "runtime/timerTrace.hpp"
#include "runtime/vframeArray.hpp"
#include "utilities/align.hpp"
#include "utilities/powerOfTwo.hpp"
diff --git a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
index 679f07a028e2c..ec9d237e50da0 100644
--- a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
+++ b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
@@ -175,6 +175,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
break;
case Interpreter::java_lang_math_fmaD:
case Interpreter::java_lang_math_fmaF:
+ case Interpreter::java_lang_math_tanh:
// TODO: Implement intrinsic
break;
default:
diff --git a/src/hotspot/cpu/arm/upcallLinker_arm.cpp b/src/hotspot/cpu/arm/upcallLinker_arm.cpp
index c7645f4a03351..696b2001e6b7b 100644
--- a/src/hotspot/cpu/arm/upcallLinker_arm.cpp
+++ b/src/hotspot/cpu/arm/upcallLinker_arm.cpp
@@ -25,7 +25,7 @@
#include "prims/upcallLinker.hpp"
#include "utilities/debug.hpp"
-address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
diff --git a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
index dc70c73d4b330..451f3b7e9cd6b 100644
--- a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2021 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -68,7 +68,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
if (_info->deoptimize_on_exception()) {
- address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+ address a = Runtime1::entry_for(C1StubId::predicate_failed_trap_id);
//__ load_const_optimized(R0, a);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
__ mtctr(R0);
@@ -79,8 +79,8 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
return;
}
- address stub = _throw_index_out_of_bounds_exception ? Runtime1::entry_for(Runtime1::throw_index_exception_id)
- : Runtime1::entry_for(Runtime1::throw_range_check_failed_id);
+ address stub = _throw_index_out_of_bounds_exception ? Runtime1::entry_for(C1StubId::throw_index_exception_id)
+ : Runtime1::entry_for(C1StubId::throw_range_check_failed_id);
//__ load_const_optimized(R0, stub);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
__ mtctr(R0);
@@ -109,7 +109,7 @@ PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
- address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+ address a = Runtime1::entry_for(C1StubId::predicate_failed_trap_id);
//__ load_const_optimized(R0, a);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
__ mtctr(R0);
@@ -133,7 +133,7 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
__ load_const_optimized(R0, md.value());
__ std(R0, -8, R1_SP);
- address a = Runtime1::entry_for(Runtime1::counter_overflow_id);
+ address a = Runtime1::entry_for(C1StubId::counter_overflow_id);
//__ load_const_optimized(R0, a);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
__ mtctr(R0);
@@ -150,7 +150,7 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) {
ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
}
__ bind(_entry);
- address stub = Runtime1::entry_for(Runtime1::throw_div0_exception_id);
+ address stub = Runtime1::entry_for(C1StubId::throw_div0_exception_id);
//__ load_const_optimized(R0, stub);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
__ mtctr(R0);
@@ -165,9 +165,9 @@ void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
address a;
if (_info->deoptimize_on_exception()) {
// Deoptimize, do not throw the exception, because it is probably wrong to do it here.
- a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+ a = Runtime1::entry_for(C1StubId::predicate_failed_trap_id);
} else {
- a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+ a = Runtime1::entry_for(C1StubId::throw_null_pointer_exception_id);
}
if (ImplicitNullChecks || TrapBasedNullChecks) {
@@ -199,14 +199,14 @@ void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
// Implementation of NewInstanceStub
-NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, C1StubId stub_id) {
_result = result;
_klass = klass;
_klass_reg = klass_reg;
_info = new CodeEmitInfo(info);
- assert(stub_id == Runtime1::new_instance_id ||
- stub_id == Runtime1::fast_new_instance_id ||
- stub_id == Runtime1::fast_new_instance_init_check_id,
+ assert(stub_id == C1StubId::new_instance_id ||
+ stub_id == C1StubId::fast_new_instance_id ||
+ stub_id == C1StubId::fast_new_instance_init_check_id,
"need new_instance id");
_stub_id = stub_id;
}
@@ -236,7 +236,7 @@ NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr re
void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
- address entry = Runtime1::entry_for(Runtime1::new_type_array_id);
+ address entry = Runtime1::entry_for(C1StubId::new_type_array_id);
//__ load_const_optimized(R0, entry);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry));
__ mr_if_needed(/*op->tmp1()->as_register()*/ R5_ARG3, _length->as_register()); // already sign-extended
@@ -259,7 +259,7 @@ NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Op
void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
- address entry = Runtime1::entry_for(Runtime1::new_object_array_id);
+ address entry = Runtime1::entry_for(C1StubId::new_object_array_id);
//__ load_const_optimized(R0, entry);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry));
__ mr_if_needed(/*op->tmp1()->as_register()*/ R5_ARG3, _length->as_register()); // already sign-extended
@@ -272,7 +272,7 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
- address stub = Runtime1::entry_for(ce->compilation()->has_fpu_code() ? Runtime1::monitorenter_id : Runtime1::monitorenter_nofpu_id);
+ address stub = Runtime1::entry_for(ce->compilation()->has_fpu_code() ? C1StubId::monitorenter_id : C1StubId::monitorenter_nofpu_id);
//__ load_const_optimized(R0, stub);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
__ mr_if_needed(/*scratch_opr()->as_register()*/ R4_ARG2, _obj_reg->as_register());
@@ -289,7 +289,7 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
if (_compute_lock) {
ce->monitor_address(_monitor_ix, _lock_reg);
}
- address stub = Runtime1::entry_for(ce->compilation()->has_fpu_code() ? Runtime1::monitorexit_id : Runtime1::monitorexit_nofpu_id);
+ address stub = Runtime1::entry_for(ce->compilation()->has_fpu_code() ? C1StubId::monitorexit_id : C1StubId::monitorexit_nofpu_id);
//__ load_const_optimized(R0, stub);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
assert(_lock_reg->as_register() == R4_ARG2, "");
@@ -403,12 +403,12 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
address target = nullptr;
relocInfo::relocType reloc_type = relocInfo::none;
switch (_id) {
- case access_field_id: target = Runtime1::entry_for(Runtime1::access_field_patching_id); break;
- case load_klass_id: target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
+ case access_field_id: target = Runtime1::entry_for(C1StubId::access_field_patching_id); break;
+ case load_klass_id: target = Runtime1::entry_for(C1StubId::load_klass_patching_id);
reloc_type = relocInfo::metadata_type; break;
- case load_mirror_id: target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
+ case load_mirror_id: target = Runtime1::entry_for(C1StubId::load_mirror_patching_id);
reloc_type = relocInfo::oop_type; break;
- case load_appendix_id: target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
+ case load_appendix_id: target = Runtime1::entry_for(C1StubId::load_appendix_patching_id);
reloc_type = relocInfo::oop_type; break;
default: ShouldNotReachHere();
}
@@ -434,7 +434,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
- address stub = Runtime1::entry_for(Runtime1::deoptimize_id);
+ address stub = Runtime1::entry_for(C1StubId::deoptimize_id);
//__ load_const_optimized(R0, stub);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
__ mtctr(R0);
diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
index 2191b894f6e16..42934dc7c3179 100644
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
@@ -176,7 +176,7 @@ int LIR_Assembler::emit_exception_handler() {
}
int offset = code_offset();
- address entry_point = CAST_FROM_FN_PTR(address, Runtime1::entry_for(Runtime1::handle_exception_from_callee_id));
+ address entry_point = CAST_FROM_FN_PTR(address, Runtime1::entry_for(C1StubId::handle_exception_from_callee_id));
//__ load_const_optimized(R0, entry_point);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry_point));
__ mtctr(R0);
@@ -222,7 +222,7 @@ int LIR_Assembler::emit_unwind_handler() {
}
// Dispatch to the unwind logic.
- address unwind_stub = Runtime1::entry_for(Runtime1::unwind_exception_id);
+ address unwind_stub = Runtime1::entry_for(C1StubId::unwind_exception_id);
//__ load_const_optimized(R0, unwind_stub);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(unwind_stub));
if (preserve_exception) { __ mr(Rexception, Rexception_save); }
@@ -1800,8 +1800,8 @@ void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmit
__ calculate_address_from_global_toc(exceptionPC->as_register(), pc_for_athrow, true, true, /*add_relocation*/ true);
add_call_info(pc_for_athrow_offset, info); // for exception handler
- address stub = Runtime1::entry_for(compilation()->has_fpu_code() ? Runtime1::handle_exception_id
- : Runtime1::handle_exception_nofpu_id);
+ address stub = Runtime1::entry_for(compilation()->has_fpu_code() ? C1StubId::handle_exception_id
+ : C1StubId::handle_exception_nofpu_id);
//__ load_const_optimized(R0, stub);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
__ mtctr(R0);
@@ -2001,7 +2001,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
__ check_klass_subtype_fast_path(sub_klass, super_klass, tmp, tmp2,
&cont, copyfunc_addr != nullptr ? ©func : &slow, nullptr);
- address slow_stc = Runtime1::entry_for(Runtime1::slow_subtype_check_id);
+ address slow_stc = Runtime1::entry_for(C1StubId::slow_subtype_check_id);
//__ load_const_optimized(tmp, slow_stc, tmp2);
__ calculate_address_from_global_toc(tmp, slow_stc, true, true, false);
__ mtctr(tmp);
@@ -2452,7 +2452,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
__ b(*success);
} else {
// Call out-of-line instance of __ check_klass_subtype_slow_path(...):
- address entry = Runtime1::entry_for(Runtime1::slow_subtype_check_id);
+ address entry = Runtime1::entry_for(C1StubId::slow_subtype_check_id);
// Stub needs fixed registers (tmp1-3).
Register original_k_RInfo = op->tmp1()->as_register();
Register original_klass_RInfo = op->tmp2()->as_register();
@@ -2543,7 +2543,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, R0, &done, &failure, nullptr);
// Call out-of-line instance of __ check_klass_subtype_slow_path(...):
- const address slow_path = Runtime1::entry_for(Runtime1::slow_subtype_check_id);
+ const address slow_path = Runtime1::entry_for(C1StubId::slow_subtype_check_id);
//__ load_const_optimized(R0, slow_path);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(slow_path));
__ mtctr(R0);
@@ -2850,8 +2850,8 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
void LIR_Assembler::rt_call(LIR_Opr result, address dest,
const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
// Stubs: Called via rt_call, but dest is a stub address (no function descriptor).
- if (dest == Runtime1::entry_for(Runtime1::register_finalizer_id) ||
- dest == Runtime1::entry_for(Runtime1::new_multi_array_id )) {
+ if (dest == Runtime1::entry_for(C1StubId::register_finalizer_id) ||
+ dest == Runtime1::entry_for(C1StubId::new_multi_array_id )) {
//__ load_const_optimized(R0, dest);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(dest));
__ mtctr(R0);
diff --git a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
index 04762a22c6110..7973e9d05459e 100644
--- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
@@ -1032,7 +1032,7 @@ void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
args->append(rank);
args->append(varargs);
const LIR_Opr reg = result_register_for(x->type());
- __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
+ __ call_runtime(Runtime1::entry_for(C1StubId::new_multi_array_id),
LIR_OprFact::illegalOpr,
reg, args, info);
@@ -1067,7 +1067,7 @@ void LIRGenerator::do_CheckCast(CheckCast* x) {
if (x->is_incompatible_class_change_check()) {
assert(patching_info == nullptr, "can't patch this");
- stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id,
+ stub = new SimpleExceptionStub(C1StubId::throw_incompatible_class_change_error_id,
LIR_OprFact::illegalOpr, info_for_exception);
} else if (x->is_invokespecial_receiver_check()) {
assert(patching_info == nullptr, "can't patch this");
@@ -1075,7 +1075,7 @@ void LIRGenerator::do_CheckCast(CheckCast* x) {
Deoptimization::Reason_class_check,
Deoptimization::Action_none);
} else {
- stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
+ stub = new SimpleExceptionStub(C1StubId::throw_class_cast_exception_id, obj.result(), info_for_exception);
}
// Following registers are used by slow_subtype_check:
LIR_Opr tmp1 = FrameMap::R4_oop_opr; // super_klass
diff --git a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp
index 059bb2eae0c3a..83fad376d292a 100644
--- a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp
@@ -92,7 +92,7 @@ void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox
}
if (LockingMode == LM_LIGHTWEIGHT) {
- lightweight_lock(Roop, Rmark, Rscratch, slow_int);
+ lightweight_lock(Rbox, Roop, Rmark, Rscratch, slow_int);
} else if (LockingMode == LM_LEGACY) {
// ... and mark it unlocked.
ori(Rmark, Rmark, markWord::unlocked_value);
@@ -293,7 +293,7 @@ void C1_MacroAssembler::initialize_object(
if (CURRENT_ENV->dtrace_alloc_probes()) {
Unimplemented();
// assert(obj == O0, "must be");
-// call(CAST_FROM_FN_PTR(address, Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)),
+// call(CAST_FROM_FN_PTR(address, Runtime1::entry_for(C1StubId::dtrace_object_alloc_id)),
// relocInfo::runtime_call_type);
}
@@ -369,7 +369,7 @@ void C1_MacroAssembler::allocate_array(
if (CURRENT_ENV->dtrace_alloc_probes()) {
Unimplemented();
//assert(obj == O0, "must be");
- //call(CAST_FROM_FN_PTR(address, Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)),
+ //call(CAST_FROM_FN_PTR(address, Runtime1::entry_for(C1StubId::dtrace_object_alloc_id)),
// relocInfo::runtime_call_type);
}
@@ -398,7 +398,7 @@ void C1_MacroAssembler::null_check(Register r, Label* Lnull) {
if (TrapBasedNullChecks) { // SIGTRAP based
trap_null_check(r);
} else { // explicit
- //const address exception_entry = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+ //const address exception_entry = Runtime1::entry_for(C1StubId::throw_null_pointer_exception_id);
assert(Lnull != nullptr, "must have Label for explicit check");
cmpdi(CCR0, r, 0);
bc_far_optimized(Assembler::bcondCRbiIs1, bi0(CCR0, Assembler::equal), *Lnull);
diff --git a/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp b/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp
index adddfda4ee74f..654626d66d812 100644
--- a/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp
@@ -97,12 +97,12 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
//load_const_optimized(R0, StubRoutines::forward_exception_entry());
//mtctr(R0);
//bctr();
- } else if (_stub_id == Runtime1::forward_exception_id) {
+ } else if (_stub_id == (int)C1StubId::forward_exception_id) {
should_not_reach_here();
} else {
// keep stub frame for next call_RT
- //load_const_optimized(R0, Runtime1::entry_for(Runtime1::forward_exception_id));
- add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(Runtime1::entry_for(Runtime1::forward_exception_id)));
+ //load_const_optimized(R0, Runtime1::entry_for(C1StubId::forward_exception_id));
+ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(Runtime1::entry_for(C1StubId::forward_exception_id)));
mtctr(R0);
bctr();
}
@@ -388,7 +388,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
return oop_maps;
}
-OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) {
OopMapSet* oop_maps = nullptr;
// For better readability.
@@ -397,22 +397,22 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
// Stub code & info for the different stubs.
switch (id) {
- case forward_exception_id:
+ case C1StubId::forward_exception_id:
{
oop_maps = generate_handle_exception(id, sasm);
}
break;
- case new_instance_id:
- case fast_new_instance_id:
- case fast_new_instance_init_check_id:
+ case C1StubId::new_instance_id:
+ case C1StubId::fast_new_instance_id:
+ case C1StubId::fast_new_instance_init_check_id:
{
- if (id == new_instance_id) {
+ if (id == C1StubId::new_instance_id) {
__ set_info("new_instance", dont_gc_arguments);
- } else if (id == fast_new_instance_id) {
+ } else if (id == C1StubId::fast_new_instance_id) {
__ set_info("fast new_instance", dont_gc_arguments);
} else {
- assert(id == fast_new_instance_init_check_id, "bad StubID");
+ assert(id == C1StubId::fast_new_instance_init_check_id, "bad C1StubId");
__ set_info("fast new_instance init check", dont_gc_arguments);
}
@@ -422,15 +422,15 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case counter_overflow_id:
+ case C1StubId::counter_overflow_id:
// Bci and method are on stack.
oop_maps = stub_call_with_stack_parms(sasm, noreg, CAST_FROM_FN_PTR(address, counter_overflow), 2);
break;
- case new_type_array_id:
- case new_object_array_id:
+ case C1StubId::new_type_array_id:
+ case C1StubId::new_object_array_id:
{
- if (id == new_type_array_id) {
+ if (id == C1StubId::new_type_array_id) {
__ set_info("new_type_array", dont_gc_arguments);
} else {
__ set_info("new_object_array", dont_gc_arguments);
@@ -439,7 +439,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
#ifdef ASSERT
// Assert object type is really an array of the proper kind.
{
- int tag = (id == new_type_array_id) ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_obj_value;
+ int tag = (id == C1StubId::new_type_array_id) ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_obj_value;
Label ok;
__ lwz(R0, in_bytes(Klass::layout_helper_offset()), R4_ARG2);
__ srawi(R0, R0, Klass::_lh_array_tag_shift);
@@ -453,7 +453,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
// We don't support eden allocation.
- if (id == new_type_array_id) {
+ if (id == C1StubId::new_type_array_id) {
oop_maps = generate_stub_call(sasm, R3_RET, CAST_FROM_FN_PTR(address, new_type_array), R4_ARG2, R5_ARG3);
} else {
oop_maps = generate_stub_call(sasm, R3_RET, CAST_FROM_FN_PTR(address, new_object_array), R4_ARG2, R5_ARG3);
@@ -461,7 +461,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case new_multi_array_id:
+ case C1StubId::new_multi_array_id:
{
// R4: klass
// R5: rank
@@ -471,7 +471,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case register_finalizer_id:
+ case C1StubId::register_finalizer_id:
{
__ set_info("register_finalizer", dont_gc_arguments);
// This code is called via rt_call. Hence, caller-save registers have been saved.
@@ -501,50 +501,50 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case throw_range_check_failed_id:
+ case C1StubId::throw_range_check_failed_id:
{
__ set_info("range_check_failed", dont_gc_arguments); // Arguments will be discarded.
oop_maps = generate_exception_throw_with_stack_parms(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), 2);
}
break;
- case throw_index_exception_id:
+ case C1StubId::throw_index_exception_id:
{
__ set_info("index_range_check_failed", dont_gc_arguments); // Arguments will be discarded.
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
}
break;
- case throw_div0_exception_id:
+ case C1StubId::throw_div0_exception_id:
{
__ set_info("throw_div0_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
}
break;
- case throw_null_pointer_exception_id:
+ case C1StubId::throw_null_pointer_exception_id:
{
__ set_info("throw_null_pointer_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
}
break;
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
{
__ set_info("handle_exception", dont_gc_arguments);
oop_maps = generate_handle_exception(id, sasm);
}
break;
- case handle_exception_from_callee_id:
+ case C1StubId::handle_exception_from_callee_id:
{
__ set_info("handle_exception_from_callee", dont_gc_arguments);
oop_maps = generate_handle_exception(id, sasm);
}
break;
- case unwind_exception_id:
+ case C1StubId::unwind_exception_id:
{
const Register Rexception = R3 /*LIRGenerator::exceptionOopOpr()*/,
Rexception_pc = R4 /*LIRGenerator::exceptionPcOpr()*/,
@@ -572,28 +572,28 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case throw_array_store_exception_id:
+ case C1StubId::throw_array_store_exception_id:
{
__ set_info("throw_array_store_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
}
break;
- case throw_class_cast_exception_id:
+ case C1StubId::throw_class_cast_exception_id:
{
__ set_info("throw_class_cast_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
}
break;
- case throw_incompatible_class_change_error_id:
+ case C1StubId::throw_incompatible_class_change_error_id:
{
__ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
}
break;
- case slow_subtype_check_id:
+ case C1StubId::slow_subtype_check_id:
{ // Support for uint StubRoutine::partial_subtype_check( Klass sub, Klass super );
const Register sub_klass = R5,
super_klass = R4,
@@ -605,12 +605,12 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case monitorenter_nofpu_id:
- case monitorenter_id:
+ case C1StubId::monitorenter_nofpu_id:
+ case C1StubId::monitorenter_id:
{
__ set_info("monitorenter", dont_gc_arguments);
- int save_fpu_registers = (id == monitorenter_id);
+ int save_fpu_registers = (id == C1StubId::monitorenter_id);
// Make a frame and preserve the caller's caller-save registers.
OopMap* oop_map = save_live_registers(sasm, save_fpu_registers);
@@ -624,15 +624,15 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case monitorexit_nofpu_id:
- case monitorexit_id:
+ case C1StubId::monitorexit_nofpu_id:
+ case C1StubId::monitorexit_id:
{
// note: Really a leaf routine but must setup last java sp
// => use call_RT for now (speed can be improved by
// doing last java sp setup manually).
__ set_info("monitorexit", dont_gc_arguments);
- int save_fpu_registers = (id == monitorexit_id);
+ int save_fpu_registers = (id == C1StubId::monitorexit_id);
// Make a frame and preserve the caller's caller-save registers.
OopMap* oop_map = save_live_registers(sasm, save_fpu_registers);
@@ -646,7 +646,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case deoptimize_id:
+ case C1StubId::deoptimize_id:
{
__ set_info("deoptimize", dont_gc_arguments);
__ std(R0, -8, R1_SP); // Pass trap_request on stack.
@@ -662,35 +662,35 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case access_field_patching_id:
+ case C1StubId::access_field_patching_id:
{
__ set_info("access_field_patching", dont_gc_arguments);
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
}
break;
- case load_klass_patching_id:
+ case C1StubId::load_klass_patching_id:
{
__ set_info("load_klass_patching", dont_gc_arguments);
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
}
break;
- case load_mirror_patching_id:
+ case C1StubId::load_mirror_patching_id:
{
__ set_info("load_mirror_patching", dont_gc_arguments);
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
}
break;
- case load_appendix_patching_id:
+ case C1StubId::load_appendix_patching_id:
{
__ set_info("load_appendix_patching", dont_gc_arguments);
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
}
break;
- case dtrace_object_alloc_id:
+ case C1StubId::dtrace_object_alloc_id:
{ // O0: object
__ unimplemented("stub dtrace_object_alloc_id");
__ set_info("dtrace_object_alloc", dont_gc_arguments);
@@ -710,7 +710,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case predicate_failed_trap_id:
+ case C1StubId::predicate_failed_trap_id:
{
__ set_info("predicate_failed_trap", dont_gc_arguments);
OopMap* oop_map = save_live_registers(sasm);
@@ -754,7 +754,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
-OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler* sasm) {
+OopMapSet* Runtime1::generate_handle_exception(C1StubId id, StubAssembler* sasm) {
__ block_comment("generate_handle_exception");
// Save registers, if required.
@@ -764,7 +764,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler* sasm) {
Rexception_pc = R4 /*LIRGenerator::exceptionPcOpr()*/;
switch (id) {
- case forward_exception_id:
+ case C1StubId::forward_exception_id:
// We're handling an exception in the context of a compiled frame.
// The registers have been saved in the standard places. Perform
// an exception lookup in the caller and dispatch to the handler
@@ -780,12 +780,12 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler* sasm) {
__ ld(Rexception_pc, _abi0(lr), Rexception_pc);
__ std(R0, in_bytes(JavaThread::pending_exception_offset()), R16_thread);
break;
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
// At this point all registers MAY be live.
- oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id, Rexception_pc);
+ oop_map = save_live_registers(sasm, id != C1StubId::handle_exception_nofpu_id, Rexception_pc);
break;
- case handle_exception_from_callee_id:
+ case C1StubId::handle_exception_from_callee_id:
// At this point all registers except exception oop and exception pc are dead.
oop_map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
@@ -824,13 +824,13 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler* sasm) {
// Restore the registers that were saved at the beginning, remove
// the frame and jump to the exception handler.
switch (id) {
- case forward_exception_id:
- case handle_exception_nofpu_id:
- case handle_exception_id:
- restore_live_registers(sasm, noreg, noreg, id != handle_exception_nofpu_id);
+ case C1StubId::forward_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
+ restore_live_registers(sasm, noreg, noreg, id != C1StubId::handle_exception_nofpu_id);
__ bctr();
break;
- case handle_exception_from_callee_id: {
+ case C1StubId::handle_exception_from_callee_id: {
__ pop_frame();
__ ld(Rexception_pc, _abi0(lr), R1_SP);
__ mtlr(Rexception_pc);
diff --git a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
index cc69c0abe361f..1147c3b42b25f 100644
--- a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
@@ -39,12 +39,12 @@
void C2_MacroAssembler::fast_lock_lightweight(ConditionRegister flag, Register obj, Register box,
Register tmp1, Register tmp2, Register tmp3) {
- compiler_fast_lock_lightweight_object(flag, obj, tmp1, tmp2, tmp3);
+ compiler_fast_lock_lightweight_object(flag, obj, box, tmp1, tmp2, tmp3);
}
void C2_MacroAssembler::fast_unlock_lightweight(ConditionRegister flag, Register obj, Register box,
Register tmp1, Register tmp2, Register tmp3) {
- compiler_fast_unlock_lightweight_object(flag, obj, tmp1, tmp2, tmp3);
+ compiler_fast_unlock_lightweight_object(flag, obj, box, tmp1, tmp2, tmp3);
}
// Intrinsics for CompactStrings
diff --git a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp
index 7d230d301c22b..39693bdf925bf 100644
--- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp
@@ -41,10 +41,20 @@
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/g1/c1/g1BarrierSetC1.hpp"
-#endif
+#endif // COMPILER1
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif // COMPILER2
#define __ masm->
+static void generate_marking_inactive_test(MacroAssembler* masm) {
+ int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ lbz(R0, active_offset, R16_thread); // tmp1 := *(mark queue active address)
+ __ cmpwi(CCR0, R0, 0);
+}
+
void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register from, Register to, Register count,
Register preserve1, Register preserve2) {
@@ -58,13 +68,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
Label filtered;
// Is marking active?
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
- __ lwz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
- } else {
- guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ lbz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
- }
- __ cmpdi(CCR0, R0, 0);
+ generate_marking_inactive_test(masm);
__ beq(CCR0, filtered);
__ save_LR(R0);
@@ -109,35 +113,48 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
__ restore_LR(R0);
}
+static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
+ const Register value, const Register temp) {
+ assert_different_registers(value, temp);
+ // Can we store a value in the given thread's buffer?
+ // (The index field is typed as size_t.)
+ __ ld(temp, in_bytes(index_offset), R16_thread); // temp := *(index address)
+ __ cmpdi(CCR0, temp, 0); // jump to runtime if index == 0 (full buffer)
+ __ beq(CCR0, runtime);
+ // The buffer is not full, store value into it.
+ __ ld(R0, in_bytes(buffer_offset), R16_thread); // R0 := buffer address
+ __ addi(temp, temp, -wordSize); // temp := next index
+ __ std(temp, in_bytes(index_offset), R16_thread); // *(index address) := next index
+ __ stdx(value, temp, R0); // *(buffer address + next index) := value
+}
+
void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, DecoratorSet decorators,
Register obj, RegisterOrConstant ind_or_offs, Register pre_val,
Register tmp1, Register tmp2,
MacroAssembler::PreservationLevel preservation_level) {
+ assert_different_registers(pre_val, tmp1, tmp2);
+
bool not_null = (decorators & IS_NOT_NULL) != 0,
preloaded = obj == noreg;
Register nv_save = noreg;
- if (preloaded) {
+ // Determine necessary runtime invocation preservation measures
+ const bool needs_frame = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR;
+ const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS;
+ const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS;
+ int nbytes_save = 0;
+
+ if (pre_val->is_volatile() && preloaded && !preserve_gp_registers) {
// We are not loading the previous value so make
// sure that we don't trash the value in pre_val
// with the code below.
- assert_different_registers(pre_val, tmp1, tmp2);
- if (pre_val->is_volatile()) {
- nv_save = !tmp1->is_volatile() ? tmp1 : tmp2;
- assert(!nv_save->is_volatile(), "need one nv temp register if pre_val lives in volatile register");
- }
+ nv_save = !tmp1->is_volatile() ? tmp1 : tmp2;
+ assert(!nv_save->is_volatile(), "need one nv temp register if pre_val lives in volatile register");
}
Label runtime, filtered;
- // Is marking active?
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
- __ lwz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
- } else {
- guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ lbz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
- }
- __ cmpdi(CCR0, tmp1, 0);
+ generate_marking_inactive_test(masm);
__ beq(CCR0, filtered);
// Do we need to load the previous value?
@@ -175,28 +192,12 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
// Can we store original value in the thread's buffer?
// Is index == 0?
// (The index field is typed as size_t.)
- const Register Rbuffer = tmp1, Rindex = tmp2;
-
- __ ld(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
- __ cmpdi(CCR0, Rindex, 0);
- __ beq(CCR0, runtime); // If index == 0, goto runtime.
- __ ld(Rbuffer, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()), R16_thread);
-
- __ addi(Rindex, Rindex, -wordSize); // Decrement index.
- __ std(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
-
- // Record the previous value.
- __ stdx(pre_val, Rbuffer, Rindex);
+ generate_queue_insertion(masm, G1ThreadLocalData::satb_mark_queue_index_offset(), G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ runtime, pre_val, tmp1);
__ b(filtered);
__ bind(runtime);
- // Determine necessary runtime invocation preservation measures
- const bool needs_frame = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR;
- const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS;
- const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS;
- int nbytes_save = 0;
-
// May need to preserve LR. Also needed if current frame is not compatible with C calling convention.
if (needs_frame) {
if (preserve_gp_registers) {
@@ -210,11 +211,11 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
__ push_frame_reg_args(nbytes_save, tmp2);
}
- if (pre_val->is_volatile() && preloaded && !preserve_gp_registers) {
+ if (nv_save != noreg) {
__ mr(nv_save, pre_val); // Save pre_val across C call if it was preloaded.
}
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, R16_thread);
- if (pre_val->is_volatile() && preloaded && !preserve_gp_registers) {
+ if (nv_save != noreg) {
__ mr(pre_val, nv_save); // restore
}
@@ -230,6 +231,26 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
__ bind(filtered);
}
+static void generate_region_crossing_test(MacroAssembler* masm, const Register store_addr, const Register new_val) {
+ __ xorr(R0, store_addr, new_val); // tmp1 := store address ^ new value
+ __ srdi_(R0, R0, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
+}
+
+static Address generate_card_young_test(MacroAssembler* masm, const Register store_addr, const Register tmp1, const Register tmp2) {
+ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
+ __ load_const_optimized(tmp1, (address)(ct->card_table()->byte_map_base()), tmp2);
+ __ srdi(tmp2, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
+ __ lbzx(R0, tmp1, tmp2); // tmp1 := card address
+ __ cmpwi(CCR0, R0, (int)G1CardTable::g1_young_card_val());
+ return Address(tmp1, tmp2); // return card address
+}
+
+static void generate_card_dirty_test(MacroAssembler* masm, Address card_addr) {
+ __ membar(Assembler::StoreLoad); // Must reload after StoreLoad membar due to concurrent refinement
+ __ lbzx(R0, card_addr.base(), card_addr.index()); // tmp2 := card
+ __ cmpwi(CCR0, R0, (int)G1CardTable::dirty_card_val()); // tmp2 := card == dirty_card_val?
+}
+
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators,
Register store_addr, Register new_val,
Register tmp1, Register tmp2, Register tmp3,
@@ -241,9 +262,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato
CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
- // Does store cross heap regions?
- __ xorr(tmp1, store_addr, new_val);
- __ srdi_(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes);
+ generate_region_crossing_test(masm, store_addr, new_val);
__ beq(CCR0, filtered);
// Crosses regions, storing null?
@@ -257,43 +276,22 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato
__ beq(CCR0, filtered);
}
- // Storing region crossing non-null, is card already dirty?
- const Register Rcard_addr = tmp1;
- Register Rbase = tmp2;
- __ load_const_optimized(Rbase, (address)(ct->card_table()->byte_map_base()), /*temp*/ tmp3);
-
- __ srdi(Rcard_addr, store_addr, CardTable::card_shift());
-
- // Get the address of the card.
- __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr);
- __ cmpwi(CCR0, tmp3, (int)G1CardTable::g1_young_card_val());
+ Address card_addr = generate_card_young_test(masm, store_addr, tmp1, tmp2);
__ beq(CCR0, filtered);
- __ membar(Assembler::StoreLoad);
- __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr); // Reload after membar.
- __ cmpwi(CCR0, tmp3 /* card value */, (int)G1CardTable::dirty_card_val());
+ generate_card_dirty_test(masm, card_addr);
__ beq(CCR0, filtered);
- // Storing a region crossing, non-null oop, card is clean.
- // Dirty card and log.
- __ li(tmp3, (int)G1CardTable::dirty_card_val());
- //release(); // G1: oops are allowed to get visible after dirty marking.
- __ stbx(tmp3, Rbase, Rcard_addr);
-
- __ add(Rcard_addr, Rbase, Rcard_addr); // This is the address which needs to get enqueued.
- Rbase = noreg; // end of lifetime
+ __ li(R0, (int)G1CardTable::dirty_card_val());
+ __ stbx(R0, card_addr.base(), card_addr.index()); // *(card address) := dirty_card_val
- const Register Rqueue_index = tmp2,
- Rqueue_buf = tmp3;
- __ ld(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread);
- __ cmpdi(CCR0, Rqueue_index, 0);
- __ beq(CCR0, runtime); // index == 0 then jump to runtime
- __ ld(Rqueue_buf, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()), R16_thread);
+ Register Rcard_addr = tmp3;
+ __ add(Rcard_addr, card_addr.base(), card_addr.index()); // This is the address which needs to get enqueued.
- __ addi(Rqueue_index, Rqueue_index, -wordSize); // decrement index
- __ std(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread);
-
- __ stdx(Rcard_addr, Rqueue_buf, Rqueue_index); // store card
+ generate_queue_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ runtime, Rcard_addr, tmp1);
__ b(filtered);
__ bind(runtime);
@@ -392,6 +390,142 @@ void G1BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value
__ bind(done);
}
+#ifdef COMPILER2
+
+static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) {
+ SaveLiveRegisters save_registers(masm, stub);
+ __ call_VM_leaf(runtime_path, arg, R16_thread);
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* stub) {
+ assert_different_registers(obj, tmp1, tmp2, R0);
+ assert_different_registers(pre_val, tmp1, R0);
+ assert(!UseCompressedOops || tmp2 != noreg, "tmp2 needed with CompressedOops");
+
+ stub->initialize_registers(obj, pre_val, R16_thread, tmp1, tmp2);
+
+ generate_marking_inactive_test(masm);
+ __ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CCR0, Assembler::equal), *stub->entry());
+
+ __ bind(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register obj = stub->obj();
+ Register pre_val = stub->pre_val();
+ Register tmp1 = stub->tmp1();
+
+ __ bind(*stub->entry());
+
+ if (obj != noreg) {
+ // Note: C2 currently doesn't use implicit null checks with barriers.
+ // Otherwise, obj could be null and the following instruction would raise a SIGSEGV.
+ if (UseCompressedOops) {
+ __ lwz(pre_val, 0, obj);
+ } else {
+ __ ld(pre_val, 0, obj);
+ }
+ }
+ __ cmpdi(CCR0, pre_val, 0);
+ __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), *stub->continuation());
+
+ Register pre_val_decoded = pre_val;
+ if (UseCompressedOops) {
+ pre_val_decoded = __ decode_heap_oop_not_null(stub->tmp2(), pre_val);
+ }
+
+ generate_queue_insertion(masm,
+ G1ThreadLocalData::satb_mark_queue_index_offset(),
+ G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ runtime, pre_val_decoded, tmp1);
+ __ b(*stub->continuation());
+
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, pre_val_decoded, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
+ __ b(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* stub,
+ bool decode_new_val) {
+ assert_different_registers(store_addr, new_val, tmp1, R0);
+ assert_different_registers(store_addr, tmp1, tmp2, R0);
+
+ stub->initialize_registers(R16_thread, tmp1, tmp2);
+
+ bool null_check_required = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
+ Register new_val_decoded = new_val;
+
+ if (decode_new_val) {
+ assert(UseCompressedOops, "or should not be here");
+ if (null_check_required && CompressedOops::base() != nullptr) {
+ // We prefer doing the null check after the region crossing check.
+ // Only compressed oop modes with base != null require a null check here.
+ __ cmpwi(CCR0, new_val, 0);
+ __ beq(CCR0, *stub->continuation());
+ null_check_required = false;
+ }
+ new_val_decoded = __ decode_heap_oop_not_null(tmp2, new_val);
+ }
+
+ generate_region_crossing_test(masm, store_addr, new_val_decoded);
+ __ beq(CCR0, *stub->continuation());
+
+ // crosses regions, storing null?
+ if (null_check_required) {
+ __ cmpdi(CCR0, new_val_decoded, 0);
+ __ beq(CCR0, *stub->continuation());
+ }
+
+ Address card_addr = generate_card_young_test(masm, store_addr, tmp1, tmp2);
+ assert(card_addr.base() == tmp1 && card_addr.index() == tmp2, "needed by post barrier stub");
+ __ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CCR0, Assembler::equal), *stub->entry());
+
+ __ bind(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Address card_addr(stub->tmp1(), stub->tmp2()); // See above.
+
+ __ bind(*stub->entry());
+
+ generate_card_dirty_test(masm, card_addr);
+ __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), *stub->continuation());
+
+ __ li(R0, (int)G1CardTable::dirty_card_val());
+ __ stbx(R0, card_addr.base(), card_addr.index()); // *(card address) := dirty_card_val
+
+ Register Rcard_addr = stub->tmp1();
+ __ add(Rcard_addr, card_addr.base(), card_addr.index()); // This is the address which needs to get enqueued.
+
+ generate_queue_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ runtime, Rcard_addr, stub->tmp2());
+ __ b(*stub->continuation());
+
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, Rcard_addr, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
+ __ b(*stub->continuation());
+}
+
+#endif // COMPILER2
+
#ifdef COMPILER1
#undef __
@@ -470,13 +604,7 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ std(tmp2, -24, R1_SP);
// Is marking still active?
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
- __ lwz(tmp, satb_q_active_byte_offset, R16_thread);
- } else {
- assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ lbz(tmp, satb_q_active_byte_offset, R16_thread);
- }
- __ cmpdi(CCR0, tmp, 0);
+ generate_marking_inactive_test(sasm);
__ beq(CCR0, marking_not_active);
__ bind(restart);
diff --git a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp
index d9a252ff6eaee..1c9fe8a5d106f 100644
--- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp
@@ -30,10 +30,16 @@
#include "gc/shared/modRefBarrierSetAssembler.hpp"
#include "utilities/macros.hpp"
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif
+
class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
+class G1PreBarrierStubC2;
+class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -59,6 +65,25 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
MacroAssembler::PreservationLevel preservation_level);
public:
+#ifdef COMPILER2
+ void g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* c2_stub);
+ void generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const;
+ void g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* c2_stub,
+ bool decode_new_val);
+ void generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const;
+#endif
#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
diff --git a/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad b/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad
new file mode 100644
index 0000000000000..f4163242cad7b
--- /dev/null
+++ b/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad
@@ -0,0 +1,684 @@
+//
+// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2024 SAP SE. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+source_hpp %{
+
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#include "gc/shared/gc_globals.hpp"
+
+%}
+
+source %{
+
+#include "gc/g1/g1BarrierSetAssembler_ppc.hpp"
+#include "gc/g1/g1BarrierSetRuntime.hpp"
+
+static void pre_write_barrier(MacroAssembler* masm,
+ const MachNode* node,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2 = noreg, // only needed with CompressedOops when pre_val needs to be preserved
+ RegSet preserve = RegSet(),
+ RegSet no_preserve = RegSet()) {
+ if (!G1PreBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node);
+ for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) {
+ stub->preserve(*reg);
+ }
+ for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) {
+ stub->dont_preserve(*reg);
+ }
+ g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, tmp1, (tmp2 != noreg) ? tmp2 : pre_val, stub);
+}
+
+static void post_write_barrier(MacroAssembler* masm,
+ const MachNode* node,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ bool decode_new_val = false) {
+ if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, tmp2, stub, decode_new_val);
+}
+
+%}
+
+instruct g1StoreP(indirect mem, iRegPsrc src, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreP mem src));
+ effect(TEMP tmp1, TEMP tmp2, KILL cr0);
+ ins_cost(2 * MEMORY_REF_COST);
+ format %{ "std $mem, $src\t# ptr" %}
+ ins_encode %{
+ pre_write_barrier(masm, this,
+ $mem$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ noreg,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ std($src$$Register, 0, $mem$$Register);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $src$$Register /* new_val */,
+ $tmp1$$Register,
+ $tmp2$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1StoreN(indirect mem, iRegNsrc src, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem src));
+ effect(TEMP tmp1, TEMP tmp2, KILL cr0);
+ ins_cost(2 * MEMORY_REF_COST);
+ format %{ "stw $mem, $src\t# ptr" %}
+ ins_encode %{
+ pre_write_barrier(masm, this,
+ $mem$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ noreg,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ stw($src$$Register, 0, $mem$$Register);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $src$$Register /* new_val */,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ true /* decode_new_val */);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1EncodePAndStoreN(indirect mem, iRegPsrc src, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem (EncodeP src)));
+ effect(TEMP tmp1, TEMP tmp2, KILL cr0);
+ ins_cost(2 * MEMORY_REF_COST);
+ format %{ "encode_heap_oop $src\n\t"
+ "stw $mem, $src\t# ptr" %}
+ ins_encode %{
+ pre_write_barrier(masm, this,
+ $mem$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ noreg,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ Register encoded_oop = noreg;
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ encoded_oop = __ encode_heap_oop($tmp2$$Register, $src$$Register);
+ } else {
+ encoded_oop = __ encode_heap_oop_not_null($tmp2$$Register, $src$$Register);
+ }
+ __ stw(encoded_oop, 0, $mem$$Register);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $src$$Register /* new_val */,
+ $tmp1$$Register,
+ $tmp2$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndExchangeP(iRegPdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndExchangeNode*)n)->order() != MemNode::acquire && ((CompareAndExchangeNode*)n)->order() != MemNode::seqcst));
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "cmpxchgd $newval, $mem" %}
+ ins_encode %{
+ Label no_update;
+ __ cmpxchgd(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register);
+ __ bind(no_update);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndExchangeP_acq(iRegPdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndExchangeNode*)n)->order() == MemNode::acquire || ((CompareAndExchangeNode*)n)->order() == MemNode::seqcst));
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "cmpxchgd acq $newval, $mem" %}
+ ins_encode %{
+ Label no_update;
+ __ cmpxchgd(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register);
+ __ bind(no_update);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+ __ sync();
+ }
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndExchangeN(iRegNdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndExchangeNode*)n)->order() != MemNode::acquire && ((CompareAndExchangeNode*)n)->order() != MemNode::seqcst));
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "cmpxchgw $newval, $mem" %}
+ ins_encode %{
+ Label no_update;
+ __ cmpxchgw(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ true /* decode_new_val */);
+ __ bind(no_update);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndExchangeN_acq(iRegNdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndExchangeNode*)n)->order() == MemNode::acquire || ((CompareAndExchangeNode*)n)->order() == MemNode::seqcst));
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "cmpxchgw acq $newval, $mem" %}
+ ins_encode %{
+ Label no_update;
+ __ cmpxchgw(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ true /* decode_new_val */);
+ __ bind(no_update);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+ __ sync();
+ }
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndSwapP(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst));
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "CMPXCHGD $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */);
+ __ li($res$$Register, 1);
+ __ bind(no_update);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndSwapP_acq(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst));
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */);
+ __ li($res$$Register, 1);
+ __ bind(no_update);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+ __ sync();
+ }
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndSwapN(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst));
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "CMPXCHGW $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ true /* decode_new_val */);
+ __ li($res$$Register, 1);
+ __ bind(no_update);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndSwapN_acq(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst));
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "CMPXCHGW acq $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ true /* decode_new_val */);
+ __ li($res$$Register, 1);
+ __ bind(no_update);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+ __ sync();
+ }
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct weakG1CompareAndSwapP(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "weak CMPXCHGD $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */);
+ __ li($res$$Register, 1);
+ __ bind(no_update);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct weakG1CompareAndSwapP_acq(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "weak CMPXCHGD acq $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */);
+ __ li($res$$Register, 1);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+ __ sync();
+ }
+ __ bind(no_update); // weak version requires no memory barrier on failure
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct weakG1CompareAndSwapN(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "weak CMPXCHGW $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ true /* decode_new_val */);
+ __ li($res$$Register, 1);
+ __ bind(no_update);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct weakG1CompareAndSwapN_acq(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "weak CMPXCHGW acq $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ true /* decode_new_val */);
+ __ li($res$$Register, 1);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+ __ sync();
+ }
+ __ bind(no_update); // weak version requires no memory barrier on failure
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1GetAndSetP(iRegPdst res, indirect mem, iRegPsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (GetAndSetP mem newval));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "GetAndSetP $newval, $mem" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ __ getandsetd($res$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::cmpxchgx_hint_atomic_update());
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg /* obj */,
+ $res$$Register /* res */,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ __ sync();
+ }
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1GetAndSetN(iRegNdst res, indirect mem, iRegNsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (GetAndSetN mem newval));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "GetAndSetN $newval, $mem" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ __ getandsetw($res$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::cmpxchgx_hint_atomic_update());
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg /* obj */,
+ $res$$Register /* res */,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ true /* decode_new_val */);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ __ sync();
+ }
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1LoadP(iRegPdst dst, memoryAlg4 mem, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_Load()->is_unordered() && n->as_Load()->barrier_data() != 0);
+ // This instruction does not need an acquiring counterpart because it is only
+ // used for reference loading (Reference::get()).
+ match(Set dst (LoadP mem));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr0);
+ ins_cost(2 * MEMORY_REF_COST);
+ format %{ "ld $dst, $mem\t# ptr" %}
+ ins_encode %{
+ __ ld($dst$$Register, $mem$$disp, $mem$$base$$Register);
+ pre_write_barrier(masm, this,
+ noreg /* obj */,
+ $dst$$Register /* pre_val */,
+ $tmp$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1LoadN(iRegNdst dst, memoryAlg4 mem, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_Load()->is_unordered() && n->as_Load()->barrier_data() != 0);
+ // This instruction does not need an acquiring counterpart because it is only
+ // used for reference loading (Reference::get()).
+ match(Set dst (LoadN mem));
+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, KILL cr0);
+ ins_cost(2 * MEMORY_REF_COST);
+ format %{ "lwz $dst, $mem\t# ptr" %}
+ ins_encode %{
+ __ lwz($dst$$Register, $mem$$disp, $mem$$base$$Register);
+ pre_write_barrier(masm, this,
+ noreg /* obj */,
+ $dst$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp
index 3cb5c5a628f39..5315080721249 100644
--- a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp
@@ -144,9 +144,9 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler *masm, Dec
// Invoke runtime.
address jrt_address = nullptr;
if (UseCompressedOops) {
- jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry);
+ jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop);
} else {
- jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry);
+ jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop);
}
assert(jrt_address != nullptr, "jrt routine cannot be found");
@@ -302,7 +302,7 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_impl(MacroAssembler *masm
}
// Invoke runtime.
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, R16_thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, R16_thread);
// Restore to-be-preserved registers.
if (!preserve_gp_registers && preloaded_mode && pre_val->is_volatile()) {
@@ -906,7 +906,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
__ push_frame_reg_args(nbytes_save, R11_tmp1);
// Invoke runtime.
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), R0_pre_val, R16_thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), R0_pre_val, R16_thread);
// Restore to-be-preserved registers.
__ pop_frame();
diff --git a/src/hotspot/cpu/ppc/gc/z/zAddress_ppc.cpp b/src/hotspot/cpu/ppc/gc/z/zAddress_ppc.cpp
index 136fd7a8ad1cd..ddeb9adf0a9ae 100644
--- a/src/hotspot/cpu/ppc/gc/z/zAddress_ppc.cpp
+++ b/src/hotspot/cpu/ppc/gc/z/zAddress_ppc.cpp
@@ -90,7 +90,7 @@ static size_t probe_valid_max_address_bit() {
}
size_t ZPlatformAddressOffsetBits() {
- const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
+ static const size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
const size_t min_address_offset_bits = max_address_offset_bits - 2;
const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
index a29e0810d52ca..aa77f0169ea1a 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
@@ -968,7 +968,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
}
if (LockingMode == LM_LIGHTWEIGHT) {
- lightweight_lock(object, header, tmp, slow_case);
+ lightweight_lock(monitor, object, header, tmp, slow_case);
b(count_locking);
} else if (LockingMode == LM_LEGACY) {
// Load markWord from object into header.
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
index 8449d74d8a861..8d8e39b8bbc00 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
@@ -2715,13 +2715,34 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
b(success);
bind(notRecursive);
+
+ // Set owner to null.
+ // Release to satisfy the JMM
+ release();
+ li(temp, 0);
+ std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
+
+ // Check if the entry lists are empty.
ld(temp, in_bytes(ObjectMonitor::EntryList_offset()), current_header);
ld(displaced_header, in_bytes(ObjectMonitor::cxq_offset()), current_header);
orr(temp, temp, displaced_header); // Will be 0 if both are 0.
cmpdi(flag, temp, 0);
- bne(flag, failure);
- release();
- std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
+ beq(flag, success); // If so we are done.
+
+ // Check if there is a successor.
+ ld(temp, in_bytes(ObjectMonitor::succ_offset()), current_header);
+ cmpdi(flag, temp, 0);
+ bne(flag, success); // If so we are done.
+
+ // Save the monitor pointer in the current thread, so we can try
+ // to reacquire the lock in SharedRuntime::monitor_exit_helper().
+ std(current_header, in_bytes(JavaThread::unlocked_inflated_monitor_offset()), R16_thread);
+
+ crxor(flag, Assembler::equal, flag, Assembler::equal); // Set flag = NE => slow path
+ b(failure);
// flag == EQ indicates success, decrement held monitor count
// flag == NE indicates failure
@@ -2730,9 +2751,9 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
bind(failure);
}
-void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister flag, Register obj, Register tmp1,
- Register tmp2, Register tmp3) {
- assert_different_registers(obj, tmp1, tmp2, tmp3);
+void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister flag, Register obj, Register box,
+ Register tmp1, Register tmp2, Register tmp3) {
+ assert_different_registers(obj, box, tmp1, tmp2, tmp3);
assert(flag == CCR0, "bad condition register");
// Handle inflated monitor.
@@ -2742,11 +2763,17 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister fla
// Finish fast lock unsuccessfully. MUST branch to with flag == EQ
Label slow_path;
+ if (UseObjectMonitorTable) {
+ // Clear cache in case fast locking succeeds.
+ li(tmp1, 0);
+ std(tmp1, in_bytes(BasicObjectLock::lock_offset()) + BasicLock::object_monitor_cache_offset_in_bytes(), box);
+ }
+
if (DiagnoseSyncOnValueBasedClasses != 0) {
load_klass(tmp1, obj);
lbz(tmp1, in_bytes(Klass::misc_flags_offset()), tmp1);
- testbitdi(flag, R0, tmp1, exact_log2(KlassFlags::_misc_is_value_based_class));
- bne(flag, slow_path);
+ testbitdi(CCR0, R0, tmp1, exact_log2(KlassFlags::_misc_is_value_based_class));
+ bne(CCR0, slow_path);
}
const Register mark = tmp1;
@@ -2761,8 +2788,8 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister fla
// Check if lock-stack is full.
lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
- cmplwi(flag, top, LockStack::end_offset() - 1);
- bgt(flag, slow_path);
+ cmplwi(CCR0, top, LockStack::end_offset() - 1);
+ bgt(CCR0, slow_path);
// The underflow check is elided. The recursive check will always fail
// when the lock stack is empty because of the _bad_oop_sentinel field.
@@ -2770,19 +2797,19 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister fla
// Check if recursive.
subi(t, top, oopSize);
ldx(t, R16_thread, t);
- cmpd(flag, obj, t);
- beq(flag, push);
+ cmpd(CCR0, obj, t);
+ beq(CCR0, push);
// Check for monitor (0b10) or locked (0b00).
ld(mark, oopDesc::mark_offset_in_bytes(), obj);
andi_(t, mark, markWord::lock_mask_in_place);
- cmpldi(flag, t, markWord::unlocked_value);
- bgt(flag, inflated);
- bne(flag, slow_path);
+ cmpldi(CCR0, t, markWord::unlocked_value);
+ bgt(CCR0, inflated);
+ bne(CCR0, slow_path);
// Not inflated.
- // Try to lock. Transition lock bits 0b00 => 0b01
+ // Try to lock. Transition lock bits 0b01 => 0b00
assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a lea");
atomically_flip_locked_state(/* is_unlock */ false, obj, mark, slow_path, MacroAssembler::MemBarAcq);
@@ -2797,38 +2824,84 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister fla
{ // Handle inflated monitor.
bind(inflated);
+ // mark contains the tagged ObjectMonitor*.
+ const uintptr_t monitor_tag = markWord::monitor_value;
+ const Register monitor = mark;
+ const Register owner_addr = tmp2;
+ Label monitor_locked;
+
if (!UseObjectMonitorTable) {
- // mark contains the tagged ObjectMonitor*.
- const Register tagged_monitor = mark;
- const uintptr_t monitor_tag = markWord::monitor_value;
- const Register owner_addr = tmp2;
+ // Compute owner address.
+ addi(owner_addr, mark, in_bytes(ObjectMonitor::owner_offset()) - monitor_tag);
+ } else {
+ Label monitor_found;
+ Register cache_addr = tmp2;
+
+ // Load cache address
+ addi(cache_addr, R16_thread, in_bytes(JavaThread::om_cache_oops_offset()));
+
+ const int num_unrolled = 2;
+ for (int i = 0; i < num_unrolled; i++) {
+ ld(tmp3, 0, cache_addr);
+ cmpd(CCR0, tmp3, obj);
+ beq(CCR0, monitor_found);
+ addi(cache_addr, cache_addr, in_bytes(OMCache::oop_to_oop_difference()));
+ }
+
+ Label loop;
+
+ // Search for obj in cache.
+ bind(loop);
+
+ // Check for match.
+ ld(tmp3, 0, cache_addr);
+ cmpd(CCR0, tmp3, obj);
+ beq(CCR0, monitor_found);
+
+ // Search until null encountered, guaranteed _null_sentinel at end.
+ addi(cache_addr, cache_addr, in_bytes(OMCache::oop_to_oop_difference()));
+ cmpdi(CCR1, tmp3, 0);
+ bne(CCR1, loop);
+ // Cache Miss, CCR0.NE set from cmp above
+ b(slow_path);
+
+ bind(monitor_found);
+ ld(monitor, in_bytes(OMCache::oop_to_monitor_difference()), cache_addr);
// Compute owner address.
- addi(owner_addr, tagged_monitor, in_bytes(ObjectMonitor::owner_offset()) - monitor_tag);
-
- // CAS owner (null => current thread).
- cmpxchgd(/*flag=*/flag,
- /*current_value=*/t,
- /*compare_value=*/(intptr_t)0,
- /*exchange_value=*/R16_thread,
- /*where=*/owner_addr,
- MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
- MacroAssembler::cmpxchgx_hint_acquire_lock());
- beq(flag, locked);
-
- // Check if recursive.
- cmpd(flag, t, R16_thread);
- bne(flag, slow_path);
-
- // Recursive.
+ addi(owner_addr, monitor, in_bytes(ObjectMonitor::owner_offset()));
+ }
+
+ // CAS owner (null => current thread).
+ cmpxchgd(/*flag=*/CCR0,
+ /*current_value=*/t,
+ /*compare_value=*/(intptr_t)0,
+ /*exchange_value=*/R16_thread,
+ /*where=*/owner_addr,
+ MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
+ MacroAssembler::cmpxchgx_hint_acquire_lock());
+ beq(CCR0, monitor_locked);
+
+ // Check if recursive.
+ cmpd(CCR0, t, R16_thread);
+ bne(CCR0, slow_path);
+
+ // Recursive.
+ if (!UseObjectMonitorTable) {
+ assert_different_registers(tmp1, owner_addr);
ld(tmp1, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), owner_addr);
addi(tmp1, tmp1, 1);
std(tmp1, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), owner_addr);
} else {
- // OMCache lookup not supported yet. Take the slowpath.
- // Set flag to NE
- crxor(flag, Assembler::equal, flag, Assembler::equal);
- b(slow_path);
+ assert_different_registers(tmp2, monitor);
+ ld(tmp2, in_bytes(ObjectMonitor::recursions_offset()), monitor);
+ addi(tmp2, tmp2, 1);
+ std(tmp2, in_bytes(ObjectMonitor::recursions_offset()), monitor);
+ }
+
+ bind(monitor_locked);
+ if (UseObjectMonitorTable) {
+ std(monitor, BasicLock::object_monitor_cache_offset_in_bytes(), box);
}
}
@@ -2838,21 +2911,21 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister fla
#ifdef ASSERT
// Check that locked label is reached with flag == EQ.
Label flag_correct;
- beq(flag, flag_correct);
+ beq(CCR0, flag_correct);
stop("Fast Lock Flag != EQ");
#endif
bind(slow_path);
#ifdef ASSERT
// Check that slow_path label is reached with flag == NE.
- bne(flag, flag_correct);
+ bne(CCR0, flag_correct);
stop("Fast Lock Flag != NE");
bind(flag_correct);
#endif
// C2 uses the value of flag (NE vs EQ) to determine the continuation.
}
-void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister flag, Register obj, Register tmp1,
- Register tmp2, Register tmp3) {
+void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister flag, Register obj, Register box,
+ Register tmp1, Register tmp2, Register tmp3) {
assert_different_registers(obj, tmp1, tmp2, tmp3);
assert(flag == CCR0, "bad condition register");
@@ -2874,9 +2947,9 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f
lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
subi(top, top, oopSize);
ldx(t, R16_thread, top);
- cmpd(flag, obj, t);
+ cmpd(CCR0, obj, t);
// Top of lock stack was not obj. Must be monitor.
- bne(flag, inflated_load_monitor);
+ bne(CCR0, inflated_load_monitor);
// Pop lock-stack.
DEBUG_ONLY(li(t, 0);)
@@ -2889,8 +2962,8 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f
// Check if recursive.
subi(t, top, oopSize);
ldx(t, R16_thread, t);
- cmpd(flag, obj, t);
- beq(flag, unlocked);
+ cmpd(CCR0, obj, t);
+ beq(CCR0, unlocked);
// Not recursive.
@@ -2941,62 +3014,74 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f
cmplwi(CCR0, top, in_bytes(JavaThread::lock_stack_base_offset()));
blt(CCR0, check_done);
ldx(t, R16_thread, top);
- cmpd(flag, obj, t);
- bne(flag, inflated);
+ cmpd(CCR0, obj, t);
+ bne(CCR0, inflated);
stop("Fast Unlock lock on stack");
bind(check_done);
#endif
- if (!UseObjectMonitorTable) {
- // mark contains the tagged ObjectMonitor*.
- const Register monitor = mark;
- const uintptr_t monitor_tag = markWord::monitor_value;
+ // mark contains the tagged ObjectMonitor*.
+ const Register monitor = mark;
+ const uintptr_t monitor_tag = markWord::monitor_value;
+ if (!UseObjectMonitorTable) {
// Untag the monitor.
subi(monitor, mark, monitor_tag);
+ } else {
+ ld(monitor, BasicLock::object_monitor_cache_offset_in_bytes(), box);
+ // null check with Flags == NE, no valid pointer below alignof(ObjectMonitor*)
+ cmpldi(CCR0, monitor, checked_cast(alignof(ObjectMonitor*)));
+ blt(CCR0, slow_path);
+ }
- const Register recursions = tmp2;
- Label not_recursive;
-
- // Check if recursive.
- ld(recursions, in_bytes(ObjectMonitor::recursions_offset()), monitor);
- addic_(recursions, recursions, -1);
- blt(CCR0, not_recursive);
+ const Register recursions = tmp2;
+ Label not_recursive;
- // Recursive unlock.
- std(recursions, in_bytes(ObjectMonitor::recursions_offset()), monitor);
- crorc(CCR0, Assembler::equal, CCR0, Assembler::equal);
- b(unlocked);
+ // Check if recursive.
+ ld(recursions, in_bytes(ObjectMonitor::recursions_offset()), monitor);
+ addic_(recursions, recursions, -1);
+ blt(CCR0, not_recursive);
- bind(not_recursive);
+ // Recursive unlock.
+ std(recursions, in_bytes(ObjectMonitor::recursions_offset()), monitor);
+ crorc(CCR0, Assembler::equal, CCR0, Assembler::equal);
+ b(unlocked);
- Label release_;
- const Register t2 = tmp2;
+ bind(not_recursive);
- // Check if the entry lists are empty.
- ld(t, in_bytes(ObjectMonitor::EntryList_offset()), monitor);
- ld(t2, in_bytes(ObjectMonitor::cxq_offset()), monitor);
- orr(t, t, t2);
- cmpdi(flag, t, 0);
- beq(flag, release_);
+ Label set_eq_unlocked;
+ const Register t2 = tmp2;
- // The owner may be anonymous and we removed the last obj entry in
- // the lock-stack. This loses the information about the owner.
- // Write the thread to the owner field so the runtime knows the owner.
- std(R16_thread, in_bytes(ObjectMonitor::owner_offset()), monitor);
- b(slow_path);
+ // Set owner to null.
+ // Release to satisfy the JMM
+ release();
+ li(t, 0);
+ std(t, in_bytes(ObjectMonitor::owner_offset()), monitor);
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
+
+ // Check if the entry lists are empty.
+ ld(t, in_bytes(ObjectMonitor::EntryList_offset()), monitor);
+ ld(t2, in_bytes(ObjectMonitor::cxq_offset()), monitor);
+ orr(t, t, t2);
+ cmpdi(CCR0, t, 0);
+ beq(CCR0, unlocked); // If so we are done.
+
+ // Check if there is a successor.
+ ld(t, in_bytes(ObjectMonitor::succ_offset()), monitor);
+ cmpdi(CCR0, t, 0);
+ bne(CCR0, set_eq_unlocked); // If so we are done.
+
+ // Save the monitor pointer in the current thread, so we can try
+ // to reacquire the lock in SharedRuntime::monitor_exit_helper().
+ std(monitor, in_bytes(JavaThread::unlocked_inflated_monitor_offset()), R16_thread);
+
+ crxor(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set flag = NE => slow path
+ b(slow_path);
- bind(release_);
- // Set owner to null.
- release();
- // t contains 0
- std(t, in_bytes(ObjectMonitor::owner_offset()), monitor);
- } else {
- // OMCache lookup not supported yet. Take the slowpath.
- // Set flag to NE
- crxor(flag, Assembler::equal, flag, Assembler::equal);
- b(slow_path);
- }
+ bind(set_eq_unlocked);
+ crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set flag = EQ => fast path
}
bind(unlocked);
@@ -3005,13 +3090,13 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f
#ifdef ASSERT
// Check that unlocked label is reached with flag == EQ.
Label flag_correct;
- beq(flag, flag_correct);
+ beq(CCR0, flag_correct);
stop("Fast Lock Flag != EQ");
#endif
bind(slow_path);
#ifdef ASSERT
// Check that slow_path label is reached with flag == NE.
- bne(flag, flag_correct);
+ bne(CCR0, flag_correct);
stop("Fast Lock Flag != NE");
bind(flag_correct);
#endif
@@ -4640,15 +4725,21 @@ void MacroAssembler::atomically_flip_locked_state(bool is_unlock, Register obj,
//
// - obj: the object to be locked
// - t1, t2: temporary register
-void MacroAssembler::lightweight_lock(Register obj, Register t1, Register t2, Label& slow) {
+void MacroAssembler::lightweight_lock(Register box, Register obj, Register t1, Register t2, Label& slow) {
assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
- assert_different_registers(obj, t1, t2);
+ assert_different_registers(box, obj, t1, t2);
Label push;
const Register top = t1;
const Register mark = t2;
const Register t = R0;
+ if (UseObjectMonitorTable) {
+ // Clear cache in case fast locking succeeds.
+ li(t, 0);
+ std(t, in_bytes(BasicObjectLock::lock_offset()) + BasicLock::object_monitor_cache_offset_in_bytes(), box);
+ }
+
// Check if the lock-stack is full.
lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
cmplwi(CCR0, top, LockStack::end_offset());
@@ -4669,7 +4760,7 @@ void MacroAssembler::lightweight_lock(Register obj, Register t1, Register t2, La
andi_(t, t, markWord::lock_mask_in_place);
bne(CCR0, slow);
- // Try to lock. Transition lock bits 0b00 => 0b01
+ // Try to lock. Transition lock bits 0b01 => 0b00
atomically_flip_locked_state(/* is_unlock */ false, obj, mark, slow, MacroAssembler::MemBarAcq);
bind(push);
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
index 03ad37a4fb04a..224e7bff99541 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
@@ -654,7 +654,7 @@ class MacroAssembler: public Assembler {
void inc_held_monitor_count(Register tmp);
void dec_held_monitor_count(Register tmp);
void atomically_flip_locked_state(bool is_unlock, Register obj, Register tmp, Label& failed, int semantics);
- void lightweight_lock(Register obj, Register t1, Register t2, Label& slow);
+ void lightweight_lock(Register box, Register obj, Register t1, Register t2, Label& slow);
void lightweight_unlock(Register obj, Register t1, Label& slow);
// allocation (for C1)
@@ -675,11 +675,11 @@ class MacroAssembler: public Assembler {
void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
Register tmp1, Register tmp2, Register tmp3);
- void compiler_fast_lock_lightweight_object(ConditionRegister flag, Register oop, Register tmp1,
- Register tmp2, Register tmp3);
+ void compiler_fast_lock_lightweight_object(ConditionRegister flag, Register oop, Register box,
+ Register tmp1, Register tmp2, Register tmp3);
- void compiler_fast_unlock_lightweight_object(ConditionRegister flag, Register oop, Register tmp1,
- Register tmp2, Register tmp3);
+ void compiler_fast_unlock_lightweight_object(ConditionRegister flag, Register oop, Register box,
+ Register tmp1, Register tmp2, Register tmp3);
// Check if safepoint requested and if so branch
void safepoint_poll(Label& slow_path, Register temp, bool at_return, bool in_nmethod);
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
index e7e066ebcc6d3..d15f9929671ba 100644
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@@ -1000,6 +1000,10 @@ int MachNode::compute_padding(int current_offset) const {
// Should the matcher clone input 'm' of node 'n'?
bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
+ if (is_encode_and_store_pattern(n, m)) {
+ mstack.push(m, Visit);
+ return true;
+ }
return false;
}
@@ -5407,7 +5411,7 @@ instruct loadRange(iRegIdst dst, memory mem) %{
// Load Compressed Pointer
instruct loadN(iRegNdst dst, memory mem) %{
match(Set dst (LoadN mem));
- predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
+ predicate((n->as_Load()->is_unordered() || followed_by_acquire(n)) && n->as_Load()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
format %{ "LWZ $dst, $mem \t// load compressed ptr" %}
@@ -5419,6 +5423,7 @@ instruct loadN(iRegNdst dst, memory mem) %{
// Load Compressed Pointer acquire.
instruct loadN_ac(iRegNdst dst, memory mem) %{
match(Set dst (LoadN mem));
+ predicate(n->as_Load()->barrier_data() == 0);
ins_cost(3*MEMORY_REF_COST);
format %{ "LWZ $dst, $mem \t// load acquire compressed ptr\n\t"
@@ -5432,7 +5437,7 @@ instruct loadN_ac(iRegNdst dst, memory mem) %{
// Load Compressed Pointer and decode it if narrow_oop_shift == 0.
instruct loadN2P_unscaled(iRegPdst dst, memory mem) %{
match(Set dst (DecodeN (LoadN mem)));
- predicate(_kids[0]->_leaf->as_Load()->is_unordered() && CompressedOops::shift() == 0);
+ predicate(_kids[0]->_leaf->as_Load()->is_unordered() && CompressedOops::shift() == 0 && _kids[0]->_leaf->as_Load()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
format %{ "LWZ $dst, $mem \t// DecodeN (unscaled)" %}
@@ -6423,6 +6428,7 @@ instruct reinterpretX(vecX dst) %{
// Store Compressed Oop
instruct storeN(memory dst, iRegN_P2N src) %{
match(Set dst (StoreN dst src));
+ predicate(n->as_Store()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
format %{ "STW $src, $dst \t// compressed oop" %}
@@ -6598,7 +6604,7 @@ instruct encodeP_not_null_Ex(iRegNdst dst, iRegPsrc src) %{
instruct encodeP_not_null_base_null(iRegNdst dst, iRegPsrc src) %{
match(Set dst (EncodeP src));
predicate(CompressedOops::shift() != 0 &&
- CompressedOops::base() ==0);
+ CompressedOops::base() == nullptr);
format %{ "SRDI $dst, $src, #3 \t// encodeP, $src != nullptr" %}
size(4);
@@ -6695,7 +6701,7 @@ instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) &&
CompressedOops::shift() != 0 &&
- CompressedOops::base() != 0);
+ CompressedOops::base() != nullptr);
ins_cost(4 * DEFAULT_COST); // Should be more expensive than decodeN_Disjoint_isel_Ex.
effect(TEMP crx);
@@ -6707,7 +6713,7 @@ instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
instruct decodeN_nullBase(iRegPdst dst, iRegNsrc src) %{
match(Set dst (DecodeN src));
predicate(CompressedOops::shift() != 0 &&
- CompressedOops::base() == 0);
+ CompressedOops::base() == nullptr);
format %{ "SLDI $dst, $src, #3 \t// DecodeN (zerobased)" %}
size(4);
@@ -6825,7 +6831,7 @@ instruct decodeN_notNull_addBase_Ex(iRegPdst dst, iRegNsrc src) %{
predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
CompressedOops::shift() != 0 &&
- CompressedOops::base() != 0);
+ CompressedOops::base() != nullptr);
ins_cost(2 * DEFAULT_COST);
format %{ "DecodeN $dst, $src \t// $src != nullptr, postalloc expanded" %}
@@ -7477,6 +7483,7 @@ instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc
instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
+ predicate(n->as_LoadStore()->barrier_data() == 0);
effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
ins_encode %{
@@ -7676,7 +7683,7 @@ instruct weakCompareAndSwapI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr,
instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
- predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
+ predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && n->as_LoadStore()->barrier_data() == 0);
effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
ins_encode %{
@@ -7690,7 +7697,7 @@ instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iReg
instruct weakCompareAndSwapN_acq_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
- predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
+ predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0);
effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %}
ins_encode %{
@@ -7939,7 +7946,7 @@ instruct compareAndExchangeI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr,
instruct compareAndExchangeN_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
- predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
+ predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && n->as_LoadStore()->barrier_data() == 0);
effect(TEMP_DEF res, TEMP cr0);
format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as narrow oop" %}
ins_encode %{
@@ -7953,7 +7960,7 @@ instruct compareAndExchangeN_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iReg
instruct compareAndExchangeN_acq_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
- predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
+ predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0);
effect(TEMP_DEF res, TEMP cr0);
format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as narrow oop" %}
ins_encode %{
@@ -8262,6 +8269,7 @@ instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr
instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{
match(Set res (GetAndSetN mem_ptr src));
+ predicate(n->as_LoadStore()->barrier_data() == 0);
effect(TEMP_DEF res, TEMP cr0);
format %{ "GetAndSetN $res, $mem_ptr, $src" %}
ins_encode %{
@@ -12106,10 +12114,10 @@ instruct cmpFastUnlock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp
ins_pipe(pipe_class_compare);
%}
-instruct cmpFastLockLightweight(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{
+instruct cmpFastLockLightweight(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR1 cr1) %{
predicate(LockingMode == LM_LIGHTWEIGHT);
match(Set crx (FastLock oop box));
- effect(TEMP tmp1, TEMP tmp2);
+ effect(TEMP tmp1, TEMP tmp2, KILL cr1);
format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2" %}
ins_encode %{
diff --git a/src/hotspot/cpu/ppc/register_ppc.hpp b/src/hotspot/cpu/ppc/register_ppc.hpp
index 302d49884fae3..b7ba4f053b5d6 100644
--- a/src/hotspot/cpu/ppc/register_ppc.hpp
+++ b/src/hotspot/cpu/ppc/register_ppc.hpp
@@ -27,6 +27,7 @@
#define CPU_PPC_REGISTER_PPC_HPP
#include "asm/register.hpp"
+#include "utilities/count_trailing_zeros.hpp"
// forward declaration
class VMRegImpl;
@@ -555,4 +556,12 @@ constexpr Register R29_TOC = R29;
constexpr Register R11_scratch1 = R11;
constexpr Register R12_scratch2 = R12;
+template <>
+inline Register AbstractRegSet::first() {
+ if (_bitset == 0) { return noreg; }
+ return as_Register(count_trailing_zeros(_bitset));
+}
+
+typedef AbstractRegSet RegSet;
+
#endif // CPU_PPC_REGISTER_PPC_HPP
diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
index 5cf5f7cf73e03..aa8ae6070b6a6 100644
--- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
+++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
@@ -2399,7 +2399,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Try fastpath for locking.
if (LockingMode == LM_LIGHTWEIGHT) {
// fast_lock kills r_temp_1, r_temp_2, r_temp_3.
- __ compiler_fast_lock_lightweight_object(CCR0, r_oop, r_temp_1, r_temp_2, r_temp_3);
+ __ compiler_fast_lock_lightweight_object(CCR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
} else {
// fast_lock kills r_temp_1, r_temp_2, r_temp_3.
__ compiler_fast_lock_object(CCR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
@@ -2605,7 +2605,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Try fastpath for unlocking.
if (LockingMode == LM_LIGHTWEIGHT) {
- __ compiler_fast_unlock_lightweight_object(CCR0, r_oop, r_temp_1, r_temp_2, r_temp_3);
+ __ compiler_fast_unlock_lightweight_object(CCR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
} else {
__ compiler_fast_unlock_object(CCR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
}
diff --git a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
index ee3f1911e2082..206c161287fa2 100644
--- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
@@ -4587,6 +4587,30 @@ address generate_lookup_secondary_supers_table_stub(u1 super_klass_index) {
return start;
}
+ // load Method* target of MethodHandle
+ // R3_ARG1 = jobject receiver
+ // R19_method = result Method*
+ address generate_upcall_stub_load_target() {
+
+ StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target");
+ address start = __ pc();
+
+ __ resolve_global_jobject(R3_ARG1, R22_tmp2, R23_tmp3, MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS);
+ // Load target method from receiver
+ __ load_heap_oop(R19_method, java_lang_invoke_MethodHandle::form_offset(), R3_ARG1,
+ R22_tmp2, R23_tmp3, MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS, IS_NOT_NULL);
+ __ load_heap_oop(R19_method, java_lang_invoke_LambdaForm::vmentry_offset(), R19_method,
+ R22_tmp2, R23_tmp3, MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS, IS_NOT_NULL);
+ __ load_heap_oop(R19_method, java_lang_invoke_MemberName::method_offset(), R19_method,
+ R22_tmp2, R23_tmp3, MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS, IS_NOT_NULL);
+ __ ld(R19_method, java_lang_invoke_ResolvedMethodName::vmtarget_offset(), R19_method);
+ __ std(R19_method, in_bytes(JavaThread::callee_target_offset()), R16_thread); // just in case callee is deoptimized
+
+ __ blr();
+
+ return start;
+ }
+
// Initialization
void generate_initial_stubs() {
// Generates all stubs and initializes the entry points
@@ -4651,6 +4675,7 @@ address generate_lookup_secondary_supers_table_stub(u1 super_klass_index) {
}
StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler();
+ StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target();
}
void generate_compiler_stubs() {
diff --git a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
index 03dca2aeb9b7b..cf3dd4cbd34c0 100644
--- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
@@ -1078,6 +1078,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
case Interpreter::java_lang_math_sin : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); break;
case Interpreter::java_lang_math_cos : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); break;
case Interpreter::java_lang_math_tan : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); break;
+ case Interpreter::java_lang_math_tanh : /* run interpreted */ break;
case Interpreter::java_lang_math_abs : /* run interpreted */ break;
case Interpreter::java_lang_math_sqrt : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt); break;
case Interpreter::java_lang_math_log : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); break;
diff --git a/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp b/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp
index b60fd4f16d163..635bab900d157 100644
--- a/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp
+++ b/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp
@@ -24,6 +24,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
+#include "classfile/javaClasses.hpp"
#include "logging/logStream.hpp"
#include "memory/resourceArea.hpp"
#include "prims/upcallLinker.hpp"
@@ -118,7 +119,7 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr
static const int upcall_stub_code_base_size = 1024;
static const int upcall_stub_size_per_arg = 16; // arg save & restore + move
-address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
@@ -221,7 +222,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
__ block_comment("{ on_entry");
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, UpcallLinker::on_entry), R0);
__ addi(R3_ARG1, R1_SP, frame_data_offset);
- __ load_const_optimized(R4_ARG2, (intptr_t)receiver, R0);
__ call_c(call_target_address);
__ mr(R16_thread, R3_RET);
__ block_comment("} on_entry");
@@ -236,12 +236,12 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
arg_shuffle.generate(_masm, as_VMStorage(callerSP), frame::native_abi_minframe_size, frame::jit_out_preserve_size);
__ block_comment("} argument shuffle");
- __ block_comment("{ receiver ");
- __ get_vm_result(R3_ARG1);
- __ block_comment("} receiver ");
-
- __ load_const_optimized(R19_method, (intptr_t)entry);
- __ std(R19_method, in_bytes(JavaThread::callee_target_offset()), R16_thread);
+ __ block_comment("{ load target ");
+ __ load_const_optimized(call_target_address, StubRoutines::upcall_stub_load_target(), R0);
+ __ load_const_optimized(R3_ARG1, (intptr_t)receiver, R0);
+ __ mtctr(call_target_address);
+ __ bctrl(); // loads target Method* into R19_method
+ __ block_comment("} load target ");
__ push_cont_fastpath();
@@ -326,7 +326,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
#ifndef PRODUCT
stringStream ss;
- ss.print("upcall_stub_%s", entry->signature()->as_C_string());
+ ss.print("upcall_stub_%s", signature->as_C_string());
const char* name = _masm->code_string(ss.as_string());
#else // PRODUCT
const char* name = "upcall_stub";
diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
index cba3dd919dafb..d1021d9e283d2 100644
--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
@@ -705,6 +705,16 @@ class Assembler : public AbstractAssembler {
emit(insn);
}
+ void fencei() {
+ unsigned insn = 0;
+ patch((address)&insn, 6, 0, 0b0001111); // opcode
+ patch((address)&insn, 11, 7, 0b00000); // rd
+ patch((address)&insn, 14, 12, 0b001); // func
+ patch((address)&insn, 19, 15, 0b00000); // rs1
+ patch((address)&insn, 31, 20, 0b000000000000); // fm
+ emit(insn);
+ }
+
#define INSN(NAME, op, funct3, funct7) \
void NAME() { \
unsigned insn = 0; \
@@ -1267,6 +1277,7 @@ enum VectorMask {
INSN(viota_m, 0b1010111, 0b010, 0b10000, 0b010100);
// Vector Single-Width Floating-Point/Integer Type-Convert Instructions
+ INSN(vfcvt_x_f_v, 0b1010111, 0b001, 0b00001, 0b010010);
INSN(vfcvt_f_x_v, 0b1010111, 0b001, 0b00011, 0b010010);
INSN(vfcvt_rtz_x_f_v, 0b1010111, 0b001, 0b00111, 0b010010);
diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
index b7e1b7863efdb..fb81082072610 100644
--- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -62,7 +62,7 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
__ mov_metadata(t0, m);
ce->store_parameter(t0, 1);
ce->store_parameter(_bci, 0);
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::counter_overflow_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
__ j(_continuation);
@@ -71,7 +71,7 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
void RangeCheckStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
if (_info->deoptimize_on_exception()) {
- address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+ address a = Runtime1::entry_for(C1StubId::predicate_failed_trap_id);
__ far_call(RuntimeAddress(a));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
@@ -84,13 +84,13 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
} else {
__ mv(t0, _index->as_jint());
}
- Runtime1::StubID stub_id;
+ C1StubId stub_id;
if (_throw_index_out_of_bounds_exception) {
- stub_id = Runtime1::throw_index_exception_id;
+ stub_id = C1StubId::throw_index_exception_id;
} else {
assert(_array != LIR_Opr::nullOpr(), "sanity");
__ mv(t1, _array->as_pointer_register());
- stub_id = Runtime1::throw_range_check_failed_id;
+ stub_id = C1StubId::throw_range_check_failed_id;
}
// t0 and t1 are used as args in generate_exception_throw,
// so use ra as the tmp register for rt_call.
@@ -106,7 +106,7 @@ PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
- address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+ address a = Runtime1::entry_for(C1StubId::predicate_failed_trap_id);
__ far_call(RuntimeAddress(a));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
@@ -118,7 +118,7 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) {
ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
}
__ bind(_entry);
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::throw_div0_exception_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::throw_div0_exception_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
#ifdef ASSERT
@@ -127,14 +127,14 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) {
}
// Implementation of NewInstanceStub
-NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, C1StubId stub_id) {
_result = result;
_klass = klass;
_klass_reg = klass_reg;
_info = new CodeEmitInfo(info);
- assert(stub_id == Runtime1::new_instance_id ||
- stub_id == Runtime1::fast_new_instance_id ||
- stub_id == Runtime1::fast_new_instance_init_check_id,
+ assert(stub_id == C1StubId::new_instance_id ||
+ stub_id == C1StubId::fast_new_instance_id ||
+ stub_id == C1StubId::fast_new_instance_init_check_id,
"need new_instance id");
_stub_id = stub_id;
}
@@ -163,7 +163,7 @@ void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
assert(_length->as_register() == x9, "length must in x9");
assert(_klass_reg->as_register() == x13, "klass_reg must in x13");
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::new_type_array_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
assert(_result->as_register() == x10, "result must in x10");
@@ -183,7 +183,7 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
assert(_length->as_register() == x9, "length must in x9");
assert(_klass_reg->as_register() == x13, "klass_reg must in x13");
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::new_object_array_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
assert(_result->as_register() == x10, "result must in x10");
@@ -195,11 +195,11 @@ void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
ce->store_parameter(_obj_reg->as_register(), 1);
ce->store_parameter(_lock_reg->as_register(), 0);
- Runtime1::StubID enter_id;
+ C1StubId enter_id;
if (ce->compilation()->has_fpu_code()) {
- enter_id = Runtime1::monitorenter_id;
+ enter_id = C1StubId::monitorenter_id;
} else {
- enter_id = Runtime1::monitorenter_nofpu_id;
+ enter_id = C1StubId::monitorenter_nofpu_id;
}
__ far_call(RuntimeAddress(Runtime1::entry_for(enter_id)));
ce->add_call_info_here(_info);
@@ -215,11 +215,11 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
}
ce->store_parameter(_lock_reg->as_register(), 0);
// note: non-blocking leaf routine => no call info needed
- Runtime1::StubID exit_id;
+ C1StubId exit_id;
if (ce->compilation()->has_fpu_code()) {
- exit_id = Runtime1::monitorexit_id;
+ exit_id = C1StubId::monitorexit_id;
} else {
- exit_id = Runtime1::monitorexit_nofpu_id;
+ exit_id = C1StubId::monitorexit_nofpu_id;
}
__ la(ra, _continuation);
__ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
@@ -244,7 +244,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
ce->store_parameter(_trap_request, 0);
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::deoptimize_id)));
ce->add_call_info_here(_info);
DEBUG_ONLY(__ should_not_reach_here());
}
@@ -253,9 +253,9 @@ void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
address a = nullptr;
if (_info->deoptimize_on_exception()) {
// Deoptimize, do not throw the exception, because it is probably wrong to do it here.
- a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+ a = Runtime1::entry_for(C1StubId::predicate_failed_trap_id);
} else {
- a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+ a = Runtime1::entry_for(C1StubId::throw_null_pointer_exception_id);
}
ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
index a8f260acae8ce..012932189382c 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -223,7 +223,7 @@ void LIR_Assembler::arraycopy_type_check(Register src, Register src_pos, Registe
__ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, nullptr);
PUSH(src, dst);
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id)));
POP(src, dst);
__ bnez(dst, cont);
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
index 3d146b87707aa..940706b0a7376 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
@@ -305,7 +305,7 @@ int LIR_Assembler::emit_exception_handler() {
__ verify_not_null_oop(x10);
// search an exception handler (x10: exception oop, x13: throwing pc)
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::handle_exception_from_callee_id)));
__ should_not_reach_here();
guarantee(code_offset() - offset <= exception_handler_size(), "overflow");
__ end_a_stub();
@@ -361,7 +361,7 @@ int LIR_Assembler::emit_unwind_handler() {
// remove the activation and dispatch to the unwind handler
__ block_comment("remove_frame and dispatch to the unwind handler");
__ remove_frame(initial_frame_size_in_bytes());
- __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
+ __ far_jump(RuntimeAddress(Runtime1::entry_for(C1StubId::unwind_exception_id)));
// Emit the slow path assembly
if (stub != nullptr) {
@@ -1088,7 +1088,7 @@ void LIR_Assembler::typecheck_helper_slowcheck(ciKlass *k, Register obj, Registe
__ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo
__ sd(k_RInfo, Address(sp, 0)); // sub klass
__ sd(klass_RInfo, Address(sp, wordSize)); // super klass
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id)));
// load result to k_RInfo
__ ld(k_RInfo, Address(sp, 0));
__ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo
@@ -1103,7 +1103,7 @@ void LIR_Assembler::typecheck_helper_slowcheck(ciKlass *k, Register obj, Registe
__ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo
__ sd(klass_RInfo, Address(sp, wordSize)); // sub klass
__ sd(k_RInfo, Address(sp, 0)); // super klass
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id)));
// load result to k_RInfo
__ ld(k_RInfo, Address(sp, 0));
__ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo
@@ -1391,7 +1391,7 @@ void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmit
// exception object is not added to oop map by LinearScan
// (LinearScan assumes that no oops are in fixed registers)
info->add_register_oop(exceptionOop);
- Runtime1::StubID unwind_id;
+ C1StubId unwind_id;
// get current pc information
// pc is only needed if the method has an exception handler, the unwind code does not need it.
@@ -1414,9 +1414,9 @@ void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmit
__ verify_not_null_oop(x10);
// search an exception handler (x10: exception oop, x13: throwing pc)
if (compilation()->has_fpu_code()) {
- unwind_id = Runtime1::handle_exception_id;
+ unwind_id = C1StubId::handle_exception_id;
} else {
- unwind_id = Runtime1::handle_exception_nofpu_id;
+ unwind_id = C1StubId::handle_exception_nofpu_id;
}
__ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id)));
__ nop();
@@ -2054,16 +2054,16 @@ void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) {
switch (patching_id(info)) {
case PatchingStub::access_field_id:
- target = Runtime1::entry_for(Runtime1::access_field_patching_id);
+ target = Runtime1::entry_for(C1StubId::access_field_patching_id);
break;
case PatchingStub::load_klass_id:
- target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
+ target = Runtime1::entry_for(C1StubId::load_klass_patching_id);
break;
case PatchingStub::load_mirror_id:
- target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
+ target = Runtime1::entry_for(C1StubId::load_mirror_patching_id);
break;
case PatchingStub::load_appendix_id:
- target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
+ target = Runtime1::entry_for(C1StubId::load_appendix_patching_id);
break;
default: ShouldNotReachHere();
}
@@ -2152,7 +2152,7 @@ void LIR_Assembler::lir_store_slowcheck(Register k_RInfo, Register klass_RInfo,
__ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo
__ sd(klass_RInfo, Address(sp, wordSize)); // sub klass
__ sd(k_RInfo, Address(sp, 0)); // super klass
- __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id)));
// load result to k_RInfo
__ ld(k_RInfo, Address(sp, 0));
__ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo
diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
index 409e8dc0a0d95..b328d457192ba 100644
--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -1030,7 +1030,7 @@ void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
args->append(rank);
args->append(varargs);
LIR_Opr reg = result_register_for(x->type());
- __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
+ __ call_runtime(Runtime1::entry_for(C1StubId::new_multi_array_id),
LIR_OprFact::illegalOpr,
reg, args, info);
@@ -1062,7 +1062,7 @@ void LIRGenerator::do_CheckCast(CheckCast* x) {
CodeStub* stub = nullptr;
if (x->is_incompatible_class_change_check()) {
assert(patching_info == nullptr, "can't patch this");
- stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr,
+ stub = new SimpleExceptionStub(C1StubId::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr,
info_for_exception);
} else if (x->is_invokespecial_receiver_check()) {
assert(patching_info == nullptr, "can't patch this");
@@ -1070,7 +1070,7 @@ void LIRGenerator::do_CheckCast(CheckCast* x) {
Deoptimization::Reason_class_check,
Deoptimization::Action_none);
} else {
- stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
+ stub = new SimpleExceptionStub(C1StubId::throw_class_cast_exception_id, obj.result(), info_for_exception);
}
LIR_Opr reg = rlock_result(x);
LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
index 1ae64b4f283ba..1e4b66069ee23 100644
--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
@@ -276,7 +276,7 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register
if (CURRENT_ENV->dtrace_alloc_probes()) {
assert(obj == x10, "must be");
- far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::dtrace_object_alloc_id)));
}
verify_oop(obj);
@@ -316,7 +316,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register tmp1
if (CURRENT_ENV->dtrace_alloc_probes()) {
assert(obj == x10, "must be");
- far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::dtrace_object_alloc_id)));
}
verify_oop(obj);
diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
index 824d03640517e..5e4031727c827 100644
--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
@@ -98,10 +98,10 @@ int StubAssembler::call_RT(Register oop_result, Register metadata_result, addres
if (frame_size() == no_frame_size) {
leave();
far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
- } else if (_stub_id == Runtime1::forward_exception_id) {
+ } else if (_stub_id == (int)C1StubId::forward_exception_id) {
should_not_reach_here();
} else {
- far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
+ far_jump(RuntimeAddress(Runtime1::entry_for(C1StubId::forward_exception_id)));
}
bind(L);
}
@@ -376,7 +376,7 @@ OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address targe
return oop_maps;
}
-OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
+OopMapSet* Runtime1::generate_handle_exception(C1StubId id, StubAssembler *sasm) {
__ block_comment("generate_handle_exception");
// incoming parameters
@@ -388,7 +388,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
OopMap* oop_map = nullptr;
switch (id) {
- case forward_exception_id:
+ case C1StubId::forward_exception_id:
// We're handling an exception in the context of a compiled frame.
// The registers have been saved in the standard places. Perform
// an exception lookup in the caller and dispatch to the handler
@@ -407,12 +407,12 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
__ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
__ sd(zr, Address(xthread, JavaThread::vm_result_2_offset()));
break;
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
// At this point all registers MAY be live.
- oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id);
+ oop_map = save_live_registers(sasm, id != C1StubId::handle_exception_nofpu_id);
break;
- case handle_exception_from_callee_id: {
+ case C1StubId::handle_exception_from_callee_id: {
// At this point all registers except exception oop (x10) and
// exception pc (ra) are dead.
const int frame_size = 2 /* fp, return address */;
@@ -469,13 +469,13 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
__ sd(x10, Address(fp, frame::return_addr_offset * BytesPerWord));
switch (id) {
- case forward_exception_id:
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::forward_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
// Restore the registers that were saved at the beginning.
- restore_live_registers(sasm, id != handle_exception_nofpu_id);
+ restore_live_registers(sasm, id != C1StubId::handle_exception_nofpu_id);
break;
- case handle_exception_from_callee_id:
+ case C1StubId::handle_exception_from_callee_id:
break;
default: ShouldNotReachHere();
}
@@ -621,7 +621,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
return oop_maps;
}
-OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) {
// for better readability
const bool dont_gc_arguments = false;
@@ -632,7 +632,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
OopMapSet* oop_maps = nullptr;
switch (id) {
{
- case forward_exception_id:
+ case C1StubId::forward_exception_id:
{
oop_maps = generate_handle_exception(id, sasm);
__ leave();
@@ -640,32 +640,32 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case throw_div0_exception_id:
+ case C1StubId::throw_div0_exception_id:
{
StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
}
break;
- case throw_null_pointer_exception_id:
+ case C1StubId::throw_null_pointer_exception_id:
{ StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
}
break;
- case new_instance_id:
- case fast_new_instance_id:
- case fast_new_instance_init_check_id:
+ case C1StubId::new_instance_id:
+ case C1StubId::fast_new_instance_id:
+ case C1StubId::fast_new_instance_init_check_id:
{
Register klass = x13; // Incoming
Register obj = x10; // Result
- if (id == new_instance_id) {
+ if (id == C1StubId::new_instance_id) {
__ set_info("new_instance", dont_gc_arguments);
- } else if (id == fast_new_instance_id) {
+ } else if (id == C1StubId::fast_new_instance_id) {
__ set_info("fast new_instance", dont_gc_arguments);
} else {
- assert(id == fast_new_instance_init_check_id, "bad StubID");
+ assert(id == C1StubId::fast_new_instance_init_check_id, "bad C1StubId");
__ set_info("fast new_instance init check", dont_gc_arguments);
}
@@ -686,7 +686,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
break;
- case counter_overflow_id:
+ case C1StubId::counter_overflow_id:
{
Register bci = x10;
Register method = x11;
@@ -710,14 +710,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case new_type_array_id:
- case new_object_array_id:
+ case C1StubId::new_type_array_id:
+ case C1StubId::new_object_array_id:
{
Register length = x9; // Incoming
Register klass = x13; // Incoming
Register obj = x10; // Result
- if (id == new_type_array_id) {
+ if (id == C1StubId::new_type_array_id) {
__ set_info("new_type_array", dont_gc_arguments);
} else {
__ set_info("new_object_array", dont_gc_arguments);
@@ -730,7 +730,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
Register tmp = obj;
__ lwu(tmp, Address(klass, Klass::layout_helper_offset()));
__ sraiw(tmp, tmp, Klass::_lh_array_tag_shift);
- int tag = ((id == new_type_array_id) ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_obj_value);
+ int tag = ((id == C1StubId::new_type_array_id) ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_obj_value);
__ mv(t0, tag);
__ beq(t0, tmp, ok);
__ stop("assert(is an array klass)");
@@ -743,7 +743,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
OopMap* map = save_live_registers(sasm);
assert_cond(map != nullptr);
int call_offset = 0;
- if (id == new_type_array_id) {
+ if (id == C1StubId::new_type_array_id) {
call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
} else {
call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
@@ -762,7 +762,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case new_multi_array_id:
+ case C1StubId::new_multi_array_id:
{
StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
// x10: klass
@@ -785,7 +785,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case register_finalizer_id:
+ case C1StubId::register_finalizer_id:
{
__ set_info("register_finalizer", dont_gc_arguments);
@@ -819,14 +819,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case throw_class_cast_exception_id:
+ case C1StubId::throw_class_cast_exception_id:
{
StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
}
break;
- case throw_incompatible_class_change_error_id:
+ case C1StubId::throw_incompatible_class_change_error_id:
{
StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return);
oop_maps = generate_exception_throw(sasm,
@@ -834,7 +834,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case slow_subtype_check_id:
+ case C1StubId::slow_subtype_check_id:
{
// Typical calling sequence:
// push klass_RInfo (object klass or other subclass)
@@ -874,10 +874,10 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case monitorenter_nofpu_id:
+ case C1StubId::monitorenter_nofpu_id:
save_fpu_registers = false;
// fall through
- case monitorenter_id:
+ case C1StubId::monitorenter_id:
{
StubFrame f(sasm, "monitorenter", dont_gc_arguments);
OopMap* map = save_live_registers(sasm, save_fpu_registers);
@@ -896,10 +896,10 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case monitorexit_nofpu_id:
+ case C1StubId::monitorexit_nofpu_id:
save_fpu_registers = false;
// fall through
- case monitorexit_id:
+ case C1StubId::monitorexit_id:
{
StubFrame f(sasm, "monitorexit", dont_gc_arguments);
OopMap* map = save_live_registers(sasm, save_fpu_registers);
@@ -920,7 +920,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case deoptimize_id:
+ case C1StubId::deoptimize_id:
{
StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return);
OopMap* oop_map = save_live_registers(sasm);
@@ -939,14 +939,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case throw_range_check_failed_id:
+ case C1StubId::throw_range_check_failed_id:
{
StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
}
break;
- case unwind_exception_id:
+ case C1StubId::unwind_exception_id:
{
__ set_info("unwind_exception", dont_gc_arguments);
// note: no stubframe since we are about to leave the current
@@ -955,7 +955,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case access_field_patching_id:
+ case C1StubId::access_field_patching_id:
{
StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return);
// we should set up register map
@@ -963,7 +963,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case load_klass_patching_id:
+ case C1StubId::load_klass_patching_id:
{
StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return);
// we should set up register map
@@ -971,7 +971,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case load_mirror_patching_id:
+ case C1StubId::load_mirror_patching_id:
{
StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return);
// we should set up register map
@@ -979,7 +979,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case load_appendix_patching_id:
+ case C1StubId::load_appendix_patching_id:
{
StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return);
// we should set up register map
@@ -987,29 +987,29 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
{
StubFrame f(sasm, "handle_exception", dont_gc_arguments);
oop_maps = generate_handle_exception(id, sasm);
}
break;
- case handle_exception_from_callee_id:
+ case C1StubId::handle_exception_from_callee_id:
{
StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
oop_maps = generate_handle_exception(id, sasm);
}
break;
- case throw_index_exception_id:
+ case C1StubId::throw_index_exception_id:
{
StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
}
break;
- case throw_array_store_exception_id:
+ case C1StubId::throw_array_store_exception_id:
{
StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return);
// tos + 0: link
@@ -1018,7 +1018,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case predicate_failed_trap_id:
+ case C1StubId::predicate_failed_trap_id:
{
StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return);
@@ -1038,7 +1038,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case dtrace_object_alloc_id:
+ case C1StubId::dtrace_object_alloc_id:
{ // c_rarg0: object
StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
save_live_registers(sasm);
diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
index 1e3a8bde064b3..75f87e35adf41 100644
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
@@ -165,6 +165,7 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg,
Register oop = objectReg;
Register box = boxReg;
Register disp_hdr = tmp1Reg;
+ Register owner_addr = tmp1Reg;
Register tmp = tmp2Reg;
Label object_has_monitor;
// Finish fast lock successfully. MUST branch to with flag == 0
@@ -222,15 +223,33 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg,
j(unlocked);
bind(notRecursive);
- ld(t0, Address(tmp, ObjectMonitor::EntryList_offset()));
- ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset()));
- orr(t0, t0, disp_hdr); // Will be 0 if both are 0.
- bnez(t0, slow_path);
+ // Compute owner address.
+ la(owner_addr, Address(tmp, ObjectMonitor::owner_offset()));
- // need a release store here
- la(tmp, Address(tmp, ObjectMonitor::owner_offset()));
+ // Set owner to null.
+ // Release to satisfy the JMM
membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
- sd(zr, Address(tmp)); // set unowned
+ sd(zr, Address(owner_addr));
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
+
+ // Check if the entry lists are empty.
+ ld(t0, Address(tmp, ObjectMonitor::EntryList_offset()));
+ ld(tmp1Reg, Address(tmp, ObjectMonitor::cxq_offset()));
+ orr(t0, t0, tmp1Reg);
+ beqz(t0, unlocked); // If so we are done.
+
+ // Check if there is a successor.
+ ld(t0, Address(tmp, ObjectMonitor::succ_offset()));
+ bnez(t0, unlocked); // If so we are done.
+
+ // Save the monitor pointer in the current thread, so we can try to
+ // reacquire the lock in SharedRuntime::monitor_exit_helper().
+ sd(tmp, Address(xthread, JavaThread::unlocked_inflated_monitor_offset()));
+
+ mv(flag, 1);
+ j(slow_path);
bind(unlocked);
mv(flag, zr);
@@ -534,28 +553,35 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box,
bind(not_recursive);
- Label release;
const Register tmp2_owner_addr = tmp2;
// Compute owner address.
la(tmp2_owner_addr, Address(tmp1_monitor, ObjectMonitor::owner_offset()));
+ // Set owner to null.
+ // Release to satisfy the JMM
+ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+ sd(zr, Address(tmp2_owner_addr));
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
+
// Check if the entry lists are empty.
ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset()));
ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset()));
orr(t0, t0, tmp3_t);
- beqz(t0, release);
+ beqz(t0, unlocked); // If so we are done.
- // The owner may be anonymous and we removed the last obj entry in
- // the lock-stack. This loses the information about the owner.
- // Write the thread to the owner field so the runtime knows the owner.
- sd(xthread, Address(tmp2_owner_addr));
- j(slow_path);
+ // Check if there is a successor.
+ ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::succ_offset()));
+ bnez(tmp3_t, unlocked); // If so we are done.
- bind(release);
- // Set owner to null.
- membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
- sd(zr, Address(tmp2_owner_addr));
+ // Save the monitor pointer in the current thread, so we can try
+ // to reacquire the lock in SharedRuntime::monitor_exit_helper().
+ sd(tmp1_monitor, Address(xthread, JavaThread::unlocked_inflated_monitor_offset()));
+
+ mv(flag, 1);
+ j(slow_path);
}
bind(unlocked);
@@ -2385,6 +2411,74 @@ void C2_MacroAssembler::expand_bits_l_v(Register dst, Register src, Register mas
expand_bits_v(dst, src, mask, /* is_long */ true);
}
+// j.l.Math.round(float)
+// Returns the closest int to the argument, with ties rounding to positive infinity.
+// We need to handle 3 special cases defined by java api spec:
+// NaN,
+// float >= Integer.MAX_VALUE,
+// float <= Integer.MIN_VALUE.
+void C2_MacroAssembler::java_round_float_v(VectorRegister dst, VectorRegister src, FloatRegister ftmp,
+ BasicType bt, uint vector_length) {
+ // In riscv, there is no straight corresponding rounding mode to satisfy the behaviour defined,
+ // in java api spec, i.e. any rounding mode can not handle some corner cases, e.g.
+ // RNE is the closest one, but it ties to "even", which means 1.5/2.5 both will be converted
+ // to 2, instead of 2 and 3 respectively.
+ // RUP does not work either, although java api requires "rounding to positive infinity",
+ // but both 1.3/1.8 will be converted to 2, instead of 1 and 2 respectively.
+ //
+ // The optimal solution for non-NaN cases is:
+ // src+0.5 => dst, with rdn rounding mode,
+ // convert dst from float to int, with rnd rounding mode.
+ // and, this solution works as expected for float >= Integer.MAX_VALUE and float <= Integer.MIN_VALUE.
+ //
+ // But, we still need to handle NaN explicilty with vector mask instructions.
+ //
+ // Check MacroAssembler::java_round_float and C2_MacroAssembler::vector_round_sve in aarch64 for more details.
+
+ csrwi(CSR_FRM, C2_MacroAssembler::rdn);
+ vsetvli_helper(bt, vector_length);
+
+ // don't rearrage the instructions sequence order without performance testing.
+ // check MacroAssembler::java_round_float in riscv64 for more details.
+ mv(t0, jint_cast(0.5f));
+ fmv_w_x(ftmp, t0);
+
+ // replacing vfclass with feq as performance optimization
+ vmfeq_vv(v0, src, src);
+ // set dst = 0 in cases of NaN
+ vmv_v_x(dst, zr);
+
+ // dst = (src + 0.5) rounded down towards negative infinity
+ vfadd_vf(dst, src, ftmp, Assembler::v0_t);
+ vfcvt_x_f_v(dst, dst, Assembler::v0_t); // in RoundingMode::rdn
+
+ csrwi(CSR_FRM, C2_MacroAssembler::rne);
+}
+
+// java.lang.Math.round(double a)
+// Returns the closest long to the argument, with ties rounding to positive infinity.
+void C2_MacroAssembler::java_round_double_v(VectorRegister dst, VectorRegister src, FloatRegister ftmp,
+ BasicType bt, uint vector_length) {
+ // check C2_MacroAssembler::java_round_float_v above for more details.
+
+ csrwi(CSR_FRM, C2_MacroAssembler::rdn);
+ vsetvli_helper(bt, vector_length);
+
+ mv(t0, julong_cast(0.5));
+ fmv_d_x(ftmp, t0);
+
+ // replacing vfclass with feq as performance optimization
+ vmfeq_vv(v0, src, src);
+ // set dst = 0 in cases of NaN
+ vmv_v_x(dst, zr);
+
+ // dst = (src + 0.5) rounded down towards negative infinity
+ vfadd_vf(dst, src, ftmp, Assembler::v0_t);
+ vfcvt_x_f_v(dst, dst, Assembler::v0_t); // in RoundingMode::rdn
+
+ csrwi(CSR_FRM, C2_MacroAssembler::rne);
+}
+
void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE,
Assembler::LMUL lmul) {
diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
index 4d7f756923c24..38351565cc626 100644
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
@@ -187,6 +187,9 @@
void expand_bits_i_v(Register dst, Register src, Register mask);
void expand_bits_l_v(Register dst, Register src, Register mask);
+ void java_round_float_v(VectorRegister dst, VectorRegister src, FloatRegister ftmp, BasicType bt, uint vector_length);
+ void java_round_double_v(VectorRegister dst, VectorRegister src, FloatRegister ftmp, BasicType bt, uint vector_length);
+
void float16_to_float_v(VectorRegister dst, VectorRegister src, uint vector_length);
void float_to_float16_v(VectorRegister dst, VectorRegister src, VectorRegister vtmp, Register tmp, uint vector_length);
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
index 062f80290626f..7036c44d99dc9 100644
--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -39,7 +39,10 @@
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/g1/c1/g1BarrierSetC1.hpp"
-#endif
+#endif // COMPILER1
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif // COMPILER2
#define __ masm->
@@ -96,6 +99,55 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
__ pop_reg(saved_regs, sp);
}
+static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
+ const Register thread, const Register value, const Register tmp1, const Register tmp2) {
+ // Can we store a value in the given thread's buffer?
+ // (The index field is typed as size_t.)
+ __ ld(tmp1, Address(thread, in_bytes(index_offset))); // tmp1 := *(index address)
+ __ beqz(tmp1, runtime); // jump to runtime if index == 0 (full buffer)
+ // The buffer is not full, store value into it.
+ __ sub(tmp1, tmp1, wordSize); // tmp1 := next index
+ __ sd(tmp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index
+ __ ld(tmp2, Address(thread, in_bytes(buffer_offset))); // tmp2 := buffer address
+ __ add(tmp2, tmp2, tmp1);
+ __ sd(value, Address(tmp2)); // *(buffer address + next index) := value
+}
+
+static void generate_pre_barrier_fast_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp1) {
+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+ // Is marking active?
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+ __ lwu(tmp1, in_progress);
+ } else {
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ lbu(tmp1, in_progress);
+ }
+}
+
+static void generate_pre_barrier_slow_path(MacroAssembler* masm,
+ const Register obj,
+ const Register pre_val,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ Label& runtime) {
+ // Do we need to load the previous value?
+ if (obj != noreg) {
+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
+ }
+ // Is the previous value null?
+ __ beqz(pre_val, done, true);
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::satb_mark_queue_index_offset(),
+ G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ runtime,
+ thread, pre_val, tmp1, tmp2);
+ __ j(done);
+}
+
void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
Register obj,
Register pre_val,
@@ -116,43 +168,10 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
assert_different_registers(obj, pre_val, tmp1, tmp2);
assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
- Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
- Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
-
- // Is marking active?
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width
- __ lwu(tmp1, in_progress);
- } else {
- assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ lbu(tmp1, in_progress);
- }
+ generate_pre_barrier_fast_path(masm, thread, tmp1);
+ // If marking is not active (*(mark queue active address) == 0), jump to done
__ beqz(tmp1, done);
-
- // Do we need to load the previous value?
- if (obj != noreg) {
- __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
- }
-
- // Is the previous value null?
- __ beqz(pre_val, done);
-
- // Can we store original value in the thread's buffer?
- // Is index == 0?
- // (The index field is typed as size_t.)
-
- __ ld(tmp1, index); // tmp := *index_adr
- __ beqz(tmp1, runtime); // tmp == 0?
- // If yes, goto runtime
-
- __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize
- __ sd(tmp1, index); // *index_adr := tmp
- __ ld(tmp2, buffer);
- __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr
-
- // Record the previous value
- __ sd(pre_val, Address(tmp1, 0));
- __ j(done);
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, done, runtime);
__ bind(runtime);
@@ -171,6 +190,49 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
}
+static void generate_post_barrier_fast_path(MacroAssembler* masm,
+ const Register store_addr,
+ const Register new_val,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ bool new_val_may_be_null) {
+ // Does store cross heap regions?
+ __ xorr(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
+ __ srli(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
+ __ beqz(tmp1, done);
+ // Crosses regions, storing null?
+ if (new_val_may_be_null) {
+ __ beqz(new_val, done);
+ }
+ // Storing region crossing non-null, is card young?
+ __ srli(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
+ __ load_byte_map_base(tmp2); // tmp2 := card table base address
+ __ add(tmp1, tmp1, tmp2); // tmp1 := card address
+ __ lbu(tmp2, Address(tmp1)); // tmp2 := card
+}
+
+static void generate_post_barrier_slow_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ Label& runtime) {
+ __ membar(MacroAssembler::StoreLoad); // StoreLoad membar
+ __ lbu(tmp2, Address(tmp1)); // tmp2 := card
+ __ beqz(tmp2, done, true);
+ // Storing a region crossing, non-null oop, card is clean.
+ // Dirty card and log.
+ STATIC_ASSERT(CardTable::dirty_card_val() == 0);
+ __ sb(zr, Address(tmp1)); // *(card address) := dirty_card_val
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ runtime,
+ thread, tmp1, tmp2, t0);
+ __ j(done);
+}
+
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
@@ -179,73 +241,119 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register tmp2) {
assert(thread == xthread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, t0);
- assert(store_addr != noreg && new_val != noreg && tmp1 != noreg &&
- tmp2 != noreg, "expecting a register");
-
- Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
-
- BarrierSet* bs = BarrierSet::barrier_set();
- CardTableBarrierSet* ctbs = barrier_set_cast(bs);
+ assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg,
+ "expecting a register");
Label done;
Label runtime;
- // Does store cross heap regions?
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
+ // If card is young, jump to done (tmp2 holds the card value)
+ __ mv(t0, (int)G1CardTable::g1_young_card_val());
+ __ beq(tmp2, t0, done); // card == young_card_val?
+ generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime);
- __ xorr(tmp1, store_addr, new_val);
- __ srli(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes);
- __ beqz(tmp1, done);
+ __ bind(runtime);
+ // save the live input values
+ RegSet saved = RegSet::of(store_addr);
+ __ push_reg(saved, sp);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread);
+ __ pop_reg(saved, sp);
- // crosses regions, storing null?
+ __ bind(done);
+}
- __ beqz(new_val, done);
+#if defined(COMPILER2)
- // storing region crossing non-null, is card already dirty?
+static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) {
+ SaveLiveRegisters save_registers(masm, stub);
+ if (c_rarg0 != arg) {
+ __ mv(c_rarg0, arg);
+ }
+ __ mv(c_rarg1, xthread);
+ __ mv(t0, runtime_path);
+ __ jalr(t0);
+}
- const Register card_addr = tmp1;
+void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* stub) {
+ assert(thread == xthread, "must be");
+ assert_different_registers(obj, pre_val, tmp1, tmp2);
+ assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
- __ srli(card_addr, store_addr, CardTable::card_shift());
+ stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2);
- // get the address of the card
- __ load_byte_map_base(tmp2);
- __ add(card_addr, card_addr, tmp2);
- __ lbu(tmp2, Address(card_addr));
- __ mv(t0, (int)G1CardTable::g1_young_card_val());
- __ beq(tmp2, t0, done);
+ generate_pre_barrier_fast_path(masm, thread, tmp1);
+ // If marking is active (*(mark queue active address) != 0), jump to stub (slow path)
+ __ bnez(tmp1, *stub->entry(), true);
- assert((int)CardTable::dirty_card_val() == 0, "must be 0");
+ __ bind(*stub->continuation());
+}
- __ membar(MacroAssembler::StoreLoad);
+void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register obj = stub->obj();
+ Register pre_val = stub->pre_val();
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1();
+ Register tmp2 = stub->tmp2();
- __ lbu(tmp2, Address(card_addr));
- __ beqz(tmp2, done);
+ __ bind(*stub->entry());
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime);
- // storing a region crossing, non-null oop, card is clean.
- // dirty card and log.
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
+ __ j(*stub->continuation());
+}
- __ sb(zr, Address(card_addr));
+void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* stub) {
+ assert(thread == xthread, "must be");
+ assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, t0);
+ assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg,
+ "expecting a register");
- __ ld(t0, queue_index);
- __ beqz(t0, runtime);
- __ sub(t0, t0, wordSize);
- __ sd(t0, queue_index);
+ stub->initialize_registers(thread, tmp1, tmp2);
- __ ld(tmp2, buffer);
- __ add(t0, tmp2, t0);
- __ sd(card_addr, Address(t0, 0));
- __ j(done);
+ bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
+ // If card is not young, jump to stub (slow path) (tmp2 holds the card value)
+ __ mv(t0, (int)G1CardTable::g1_young_card_val());
+ __ bne(tmp2, t0, *stub->entry(), true);
- __ bind(runtime);
- // save the live input values
- RegSet saved = RegSet::of(store_addr);
- __ push_reg(saved, sp);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
- __ pop_reg(saved, sp);
+ __ bind(*stub->continuation());
+}
- __ bind(done);
+void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
+ Register tmp2 = stub->tmp2();
+
+ __ bind(*stub->entry());
+ generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime);
+
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
+ __ j(*stub->continuation());
}
+#endif // COMPILER2
+
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1, Register tmp2) {
bool on_oop = is_reference_type(type);
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
index 96568994079dd..c7bee2ef6f3a8 100644
--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -36,6 +36,8 @@ class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
+class G1PreBarrierStubC2;
+class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -72,6 +74,27 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
#endif
+#ifdef COMPILER2
+ void g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* c2_stub);
+ void generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const;
+ void g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* c2_stub);
+ void generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const;
+#endif
+
void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1, Register tmp2);
};
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad b/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad
new file mode 100644
index 0000000000000..1dc5834dbdc89
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad
@@ -0,0 +1,564 @@
+//
+// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+source_hpp %{
+
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#include "gc/shared/gc_globals.hpp"
+
+%}
+
+source %{
+
+#include "gc/g1/g1BarrierSetAssembler_riscv.hpp"
+#include "gc/g1/g1BarrierSetRuntime.hpp"
+
+static void write_barrier_pre(MacroAssembler* masm,
+ const MachNode* node,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2,
+ RegSet preserve = RegSet(),
+ RegSet no_preserve = RegSet()) {
+ if (!G1PreBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node);
+ for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) {
+ stub->preserve(*reg);
+ }
+ for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) {
+ stub->dont_preserve(*reg);
+ }
+ g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, xthread, tmp1, tmp2, stub);
+}
+
+static void write_barrier_post(MacroAssembler* masm,
+ const MachNode* node,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2) {
+ if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, xthread, tmp1, tmp2, stub);
+}
+
+%}
+
+instruct g1StoreP(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreP mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ ins_cost(STORE_COST);
+ format %{ "sd $src, $mem\t# ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ sd($src$$Register, Address($mem$$Register));
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(istore_reg_mem);
+%}
+
+instruct g1StoreN(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ ins_cost(STORE_COST);
+ format %{ "sw $src, $mem\t# compressed ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ sw($src$$Register, Address($mem$$Register));
+ if ((barrier_data() & G1C2BarrierPost) != 0) {
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ decode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ }
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(istore_reg_mem);
+%}
+
+instruct g1EncodePAndStoreN(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem (EncodeP src)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ ins_cost(STORE_COST);
+ format %{ "encode_heap_oop $tmp1, $src\n\t"
+ "sw $tmp1, $mem\t# compressed ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ encode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ __ sw($tmp1$$Register, Address($mem$$Register));
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(istore_reg_mem);
+%}
+
+instruct g1CompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $res = $mem, $oldval, $newval\t# ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ // Pass $oldval to the pre-barrier (instead of loading from $mem), because
+ // $oldval is the only value that can be overwritten.
+ // The same holds for g1CompareAndSwapP and its Acq variant.
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1CompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2)
+%{
+ predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ // Pass $oldval to the pre-barrier (instead of loading from $mem), because
+ // $oldval is the only value that can be overwritten.
+ // The same holds for g1CompareAndSwapP and its Acq variant.
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1CompareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $res = $mem, $oldval, $newval\t# narrow oop" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1CompareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3)
+%{
+ predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# narrow oop" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1CompareAndSwapP(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $mem, $oldval, $newval\t# (ptr)\n\t"
+ "mv $res, $res == $oldval" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+ /*result as bool*/ true);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1CompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval)
+%{
+ predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr)\n\t"
+ "mv $res, $res == $oldval" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+ /*result as bool*/ true);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1CompareAndSwapN(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $mem, $oldval, $newval\t# (narrow oop)\n\t"
+ "mv $res, $res == $oldval" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+ /*result as bool*/ true);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1CompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval)
+%{
+ predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop)\n\t"
+ "mv $res, $res == $oldval" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+ /*result as bool*/ true);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1GetAndSetP(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetP mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "atomic_xchg $preval, $newval, [$mem]" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $preval$$Register /* pre_val (as a temporary register) */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchg($preval$$Register, $newval$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+instruct g1GetAndSetPAcq(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval)
+%{
+ predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetP mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "atomic_xchg_acq $preval, $newval, [$mem]" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $preval$$Register /* pre_val (as a temporary register) */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchgal($preval$$Register, $newval$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+instruct g1GetAndSetN(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetN mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "atomic_xchgwu $preval, $newval, [$mem]" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchgwu($preval$$Register, $newval$$Register, $mem$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+instruct g1GetAndSetNAcq(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval)
+%{
+ predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetN mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "atomic_xchgwu_acq $preval, $newval, [$mem]" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchgalwu($preval$$Register, $newval$$Register, $mem$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+instruct g1LoadP(iRegPNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2)
+%{
+ predicate(UseG1GC && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadP mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2);
+ ins_cost(LOAD_COST + BRANCH_COST);
+ format %{ "ld $dst, $mem\t# ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ __ ld($dst$$Register, Address($mem$$Register));
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $dst$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(iload_reg_mem);
+%}
+
+instruct g1LoadN(iRegNNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3)
+%{
+ predicate(UseG1GC && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadN mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ ins_cost(LOAD_COST + BRANCH_COST);
+ format %{ "lwu $dst, $mem\t# compressed ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ __ lwu($dst$$Register, Address($mem$$Register));
+ if ((barrier_data() & G1C2BarrierPre) != 0) {
+ __ decode_heap_oop($tmp1$$Register, $dst$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ }
+ %}
+ ins_pipe(iload_reg_mem);
+%}
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
index 9a79a92327723..cc73d14a756f2 100644
--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
@@ -70,10 +70,10 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec
__ push_reg(saved_regs, sp);
if (UseCompressedOops) {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop),
src, dst, count);
} else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop), src, dst, count);
}
__ pop_reg(saved_regs, sp);
__ bind(done);
@@ -165,9 +165,9 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
// expand_call should be passed true.
if (expand_call) {
assert(pre_val != c_rarg1, "smashed arg");
- __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread);
} else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread);
}
__ pop_reg(saved, sp);
@@ -645,7 +645,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
__ bind(runtime);
__ push_call_clobbered_registers();
__ load_parameter(0, pre_val);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread);
__ pop_call_clobbered_registers();
__ bind(done);
diff --git a/src/hotspot/cpu/riscv/gc/z/zAddress_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zAddress_riscv.cpp
index ef13676b02ed8..df111723d56b6 100644
--- a/src/hotspot/cpu/riscv/gc/z/zAddress_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/z/zAddress_riscv.cpp
@@ -92,7 +92,7 @@ static size_t probe_valid_max_address_bit() {
}
size_t ZPlatformAddressOffsetBits() {
- const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
+ static const size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
const size_t min_address_offset_bits = max_address_offset_bits - 2;
const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
index 8fbeaa45371d1..cbb918ade00fe 100644
--- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
@@ -636,8 +636,20 @@ void ZBarrierSetAssembler::patch_barrier_relocation(address addr, int format) {
ShouldNotReachHere();
}
- // A full fence is generated before icache_flush by default in invalidate_word
- ICache::invalidate_range(addr, bytes);
+ // If we are using UseCtxFencei no ICache invalidation is needed here.
+ // Instead every hart will preform an fence.i either by a Java thread
+ // (due to patching epoch will take it to slow path),
+ // or by the kernel when a Java thread is moved to a hart.
+ // The instruction streams changes must only happen before the disarm of
+ // the nmethod barrier. Where the disarm have a leading full two way fence.
+ // If this is performed during a safepoint, all Java threads will emit a fence.i
+ // before transitioning to 'Java', e.g. leaving native or the safepoint wait barrier.
+ if (!UseCtxFencei) {
+ // ICache invalidation is a serialization point.
+ // The above patching of instructions happens before the invalidation.
+ // Hence it have a leading full two way fence (wr, wr).
+ ICache::invalidate_range(addr, bytes);
+ }
}
#ifdef COMPILER2
diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
index c2585f2d1618d..dd31de14704ab 100644
--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
@@ -122,6 +122,8 @@ define_pd_global(intx, InlineSmallCode, 1000);
product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \
"Use RVV instructions for left/right shift of BigInteger") \
product(bool, UseTrampolines, false, EXPERIMENTAL, \
- "Far calls uses jal to trampoline.")
+ "Far calls uses jal to trampoline.") \
+ product(bool, UseCtxFencei, false, EXPERIMENTAL, \
+ "Use PR_RISCV_CTX_SW_FENCEI_ON to avoid explicit icache flush")
#endif // CPU_RISCV_GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
index cbca980288984..b99ba542423a1 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@@ -1454,6 +1454,105 @@ void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp1,
xorr(crc, crc, tmp2);
}
+
+#ifdef COMPILER2
+// This improvement (vectorization) is based on java.base/share/native/libzip/zlib/zcrc32.c.
+// To make it, following steps are taken:
+// 1. in zcrc32.c, modify N to 16 and related code,
+// 2. re-generate the tables needed, we use tables of (N == 16, W == 4)
+// 3. finally vectorize the code (original implementation in zcrc32.c is just scalar code).
+// New tables for vector version is after table3.
+void MacroAssembler::vector_update_crc32(Register crc, Register buf, Register len,
+ Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
+ Register table0, Register table3) {
+ assert_different_registers(t1, crc, buf, len, tmp1, tmp2, tmp3, tmp4, tmp5, table0, table3);
+ const int N = 16, W = 4;
+ const int64_t single_table_size = 256;
+ const Register blks = tmp2;
+ const Register tmpTable = tmp3, tableN16 = tmp4;
+ const VectorRegister vcrc = v4, vword = v8, vtmp = v12;
+ Label VectorLoop;
+ Label LastBlock;
+
+ add(tableN16, table3, 1*single_table_size*sizeof(juint), tmp1);
+ mv(tmp5, 0xff);
+
+ if (MaxVectorSize == 16) {
+ vsetivli(zr, N, Assembler::e32, Assembler::m4, Assembler::ma, Assembler::ta);
+ } else if (MaxVectorSize == 32) {
+ vsetivli(zr, N, Assembler::e32, Assembler::m2, Assembler::ma, Assembler::ta);
+ } else {
+ assert(MaxVectorSize > 32, "sanity");
+ vsetivli(zr, N, Assembler::e32, Assembler::m1, Assembler::ma, Assembler::ta);
+ }
+
+ vmv_v_x(vcrc, zr);
+ vmv_s_x(vcrc, crc);
+
+ // multiple of 64
+ srli(blks, len, 6);
+ slli(t1, blks, 6);
+ sub(len, len, t1);
+ sub(blks, blks, 1);
+ blez(blks, LastBlock);
+
+ bind(VectorLoop);
+ {
+ mv(tmpTable, tableN16);
+
+ vle32_v(vword, buf);
+ vxor_vv(vword, vword, vcrc);
+
+ addi(buf, buf, N*4);
+
+ vand_vx(vtmp, vword, tmp5);
+ vsll_vi(vtmp, vtmp, 2);
+ vluxei32_v(vcrc, tmpTable, vtmp);
+
+ mv(tmp1, 1);
+ for (int k = 1; k < W; k++) {
+ addi(tmpTable, tmpTable, single_table_size*4);
+
+ slli(t1, tmp1, 3);
+ vsrl_vx(vtmp, vword, t1);
+
+ vand_vx(vtmp, vtmp, tmp5);
+ vsll_vi(vtmp, vtmp, 2);
+ vluxei32_v(vtmp, tmpTable, vtmp);
+
+ vxor_vv(vcrc, vcrc, vtmp);
+
+ addi(tmp1, tmp1, 1);
+ }
+
+ sub(blks, blks, 1);
+ bgtz(blks, VectorLoop);
+ }
+
+ bind(LastBlock);
+ {
+ vle32_v(vtmp, buf);
+ vxor_vv(vcrc, vcrc, vtmp);
+ mv(crc, zr);
+ for (int i = 0; i < N; i++) {
+ vmv_x_s(tmp2, vcrc);
+ // in vmv_x_s, the value is sign-extended to SEW bits, but we need zero-extended here.
+ zext_w(tmp2, tmp2);
+ vslidedown_vi(vcrc, vcrc, 1);
+ xorr(crc, crc, tmp2);
+ for (int j = 0; j < W; j++) {
+ andr(t1, crc, tmp5);
+ shadd(t1, t1, table0, tmp1, 2);
+ lwu(t1, Address(t1, 0));
+ srli(tmp2, crc, 8);
+ xorr(crc, tmp2, t1);
+ }
+ }
+ addi(buf, buf, N*4);
+ }
+}
+#endif // COMPILER2
+
/**
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)
@@ -1465,33 +1564,41 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
Register table0, Register table1, Register table2, Register table3,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6) {
assert_different_registers(crc, buf, len, table0, table1, table2, table3, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
- Label L_by16_loop, L_unroll_loop, L_unroll_loop_entry, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit;
+ Label L_vector_entry,
+ L_unroll_loop,
+ L_by4_loop_entry, L_by4_loop,
+ L_by1_loop, L_exit;
+ const int64_t single_table_size = 256;
const int64_t unroll = 16;
const int64_t unroll_words = unroll*wordSize;
mv(tmp5, right_32_bits);
- subw(len, len, unroll_words);
andn(crc, tmp5, crc);
const ExternalAddress table_addr = StubRoutines::crc_table_addr();
la(table0, table_addr);
- add(table1, table0, 1*256*sizeof(juint), tmp1);
- add(table2, table0, 2*256*sizeof(juint), tmp1);
- add(table3, table2, 1*256*sizeof(juint), tmp1);
+ add(table1, table0, 1*single_table_size*sizeof(juint), tmp1);
+ add(table2, table0, 2*single_table_size*sizeof(juint), tmp1);
+ add(table3, table2, 1*single_table_size*sizeof(juint), tmp1);
- bge(len, zr, L_unroll_loop_entry);
- addiw(len, len, unroll_words-4);
- bge(len, zr, L_by4_loop);
- addiw(len, len, 4);
- bgt(len, zr, L_by1_loop);
- j(L_exit);
+#ifdef COMPILER2
+ if (UseRVV) {
+ const int64_t tmp_limit = MaxVectorSize >= 32 ? unroll_words*3 : unroll_words*5;
+ mv(tmp1, tmp_limit);
+ bge(len, tmp1, L_vector_entry);
+ }
+#endif // COMPILER2
+
+ mv(tmp1, unroll_words);
+ blt(len, tmp1, L_by4_loop_entry);
+
+ const Register loop_buf_end = tmp3;
align(CodeEntryAlignment);
- bind(L_unroll_loop_entry);
- const Register buf_end = tmp3;
- add(buf_end, buf, len); // buf_end will be used as endpoint for loop below
+ // Entry for L_unroll_loop
+ add(loop_buf_end, buf, len); // loop_buf_end will be used as endpoint for loop below
andi(len, len, unroll_words-1); // len = (len % unroll_words)
- sub(len, len, unroll_words); // Length after all iterations
+ sub(loop_buf_end, loop_buf_end, len);
bind(L_unroll_loop);
for (int i = 0; i < unroll; i++) {
ld(tmp1, Address(buf, i*wordSize));
@@ -1500,44 +1607,52 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
}
addi(buf, buf, unroll_words);
- ble(buf, buf_end, L_unroll_loop);
- addiw(len, len, unroll_words-4);
- bge(len, zr, L_by4_loop);
- addiw(len, len, 4);
- bgt(len, zr, L_by1_loop);
- j(L_exit);
-
+ blt(buf, loop_buf_end, L_unroll_loop);
+
+ bind(L_by4_loop_entry);
+ mv(tmp1, 4);
+ blt(len, tmp1, L_by1_loop);
+ add(loop_buf_end, buf, len); // loop_buf_end will be used as endpoint for loop below
+ andi(len, len, 3);
+ sub(loop_buf_end, loop_buf_end, len);
bind(L_by4_loop);
lwu(tmp1, Address(buf));
update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, false);
- subw(len, len, 4);
addi(buf, buf, 4);
- bge(len, zr, L_by4_loop);
- addiw(len, len, 4);
- ble(len, zr, L_exit);
+ blt(buf, loop_buf_end, L_by4_loop);
bind(L_by1_loop);
+ beqz(len, L_exit);
+
subw(len, len, 1);
lwu(tmp1, Address(buf));
andi(tmp2, tmp1, right_8_bits);
update_byte_crc32(crc, tmp2, table0);
- ble(len, zr, L_exit);
+ beqz(len, L_exit);
subw(len, len, 1);
srli(tmp2, tmp1, 8);
andi(tmp2, tmp2, right_8_bits);
update_byte_crc32(crc, tmp2, table0);
- ble(len, zr, L_exit);
+ beqz(len, L_exit);
subw(len, len, 1);
srli(tmp2, tmp1, 16);
andi(tmp2, tmp2, right_8_bits);
update_byte_crc32(crc, tmp2, table0);
- ble(len, zr, L_exit);
- srli(tmp2, tmp1, 24);
- andi(tmp2, tmp2, right_8_bits);
- update_byte_crc32(crc, tmp2, table0);
+#ifdef COMPILER2
+ // put vector code here, otherwise "offset is too large" error occurs.
+ if (UseRVV) {
+ // only need to jump exit when UseRVV == true, it's a jump from end of block `L_by1_loop`.
+ j(L_exit);
+
+ bind(L_vector_entry);
+ vector_update_crc32(crc, buf, len, tmp1, tmp2, tmp3, tmp4, tmp6, table0, table3);
+
+ bgtz(len, L_by4_loop_entry);
+ }
+#endif // COMPILER2
bind(L_exit);
andn(crc, tmp5, crc);
@@ -1853,9 +1968,9 @@ int MacroAssembler::patch_oop(address insn_addr, address o) {
void MacroAssembler::reinit_heapbase() {
if (UseCompressedOops) {
if (Universe::is_fully_initialized()) {
- mv(xheapbase, CompressedOops::ptrs_base());
+ mv(xheapbase, CompressedOops::base());
} else {
- ExternalAddress target(CompressedOops::ptrs_base_addr());
+ ExternalAddress target(CompressedOops::base_addr());
relocate(target.rspec(), [&] {
int32_t offset;
la(xheapbase, target.target(), offset);
@@ -1968,23 +2083,11 @@ void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register
}
void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) {
- if (is_simm12(-decrement)) {
- addi(Rd, Rn, -decrement);
- } else {
- assert_different_registers(Rn, temp);
- li(temp, decrement);
- sub(Rd, Rn, temp);
- }
+ add(Rd, Rn, -decrement, temp);
}
void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) {
- if (is_simm12(-decrement)) {
- addiw(Rd, Rn, -decrement);
- } else {
- assert_different_registers(Rn, temp);
- li(temp, decrement);
- subw(Rd, Rn, temp);
- }
+ addw(Rd, Rn, -decrement, temp);
}
void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
@@ -3054,6 +3157,13 @@ void MacroAssembler::membar(uint32_t order_constraint) {
}
}
+void MacroAssembler::cmodx_fence() {
+ BLOCK_COMMENT("cmodx fence");
+ if (VM_Version::supports_fencei_barrier()) {
+ Assembler::fencei();
+ }
+}
+
// Form an address from base + offset in Rd. Rd my or may not
// actually be used: you must use the Address that is returned. It
// is up to you to ensure that the shift provided matches the size
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
index c3161beea117d..fd174f241eb0b 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@@ -431,6 +431,8 @@ class MacroAssembler: public Assembler {
}
}
+ void cmodx_fence();
+
void pause() {
Assembler::fence(w, 0);
}
@@ -1321,6 +1323,10 @@ class MacroAssembler: public Assembler {
void update_byte_crc32(Register crc, Register val, Register table);
#ifdef COMPILER2
+ void vector_update_crc32(Register crc, Register buf, Register len,
+ Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
+ Register table0, Register table3);
+
void mul_add(Register out, Register in, Register offset,
Register len, Register k, Register tmp);
void wide_mul(Register prod_lo, Register prod_hi, Register n, Register m);
@@ -1350,7 +1356,7 @@ class MacroAssembler: public Assembler {
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
Register tmp5, Register tmp6, Register product_hi);
-#endif
+#endif // COMPILER2
void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
index d0903c96e2271..18b4302c7e68e 100644
--- a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
@@ -55,7 +55,21 @@ void Relocation::pd_set_data_value(address x, bool verify_only) {
bytes = MacroAssembler::pd_patch_instruction_size(addr(), x);
break;
}
- ICache::invalidate_range(addr(), bytes);
+
+ // If we are using UseCtxFencei no ICache invalidation is needed here.
+ // Instead every hart will preform an fence.i either by a Java thread
+ // (due to patching epoch will take it to slow path),
+ // or by the kernel when a Java thread is moved to a hart.
+ // The instruction streams changes must only happen before the disarm of
+ // the nmethod barrier. Where the disarm have a leading full two way fence.
+ // If this is performed during a safepoint, all Java threads will emit a fence.i
+ // before transitioning to 'Java', e.g. leaving native or the safepoint wait barrier.
+ if (!UseCtxFencei) {
+ // ICache invalidation is a serialization point.
+ // The above patching of instructions happens before the invalidation.
+ // Hence it have a leading full two way fence (wr, wr).
+ ICache::invalidate_range(addr(), bytes);
+ }
}
address Relocation::pd_call_destination(address orig_addr) {
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
index db010c9c6c82f..563dfd4cde972 100644
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -1920,6 +1920,18 @@ bool Matcher::match_rule_supported(int opcode) {
case Op_EncodeISOArray:
return UseRVV;
+ // Current test shows that, it brings performance gain when MaxVectorSize >= 32, but brings
+ // regression when MaxVectorSize == 16. So only enable the intrinsic when MaxVectorSize >= 32.
+ case Op_RoundVF:
+ return UseRVV && MaxVectorSize >= 32;
+
+ // For double, current test shows that even with MaxVectorSize == 32, there is still some regression.
+ // Although there is no hardware to verify it for now, from the trend of performance data on hardwares
+ // (with vlenb == 16 and 32 respectively), it's promising to bring better performance rather than
+ // regression for double when MaxVectorSize == 64+. So only enable the intrinsic when MaxVectorSize >= 64.
+ case Op_RoundVD:
+ return UseRVV && MaxVectorSize >= 64;
+
case Op_PopCountI:
case Op_PopCountL:
return UsePopCountInstruction;
@@ -2212,7 +2224,8 @@ bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
assert_cond(m != nullptr);
if (is_vshift_con_pattern(n, m) || // ShiftV src (ShiftCntV con)
is_vector_bitwise_not_pattern(n, m) ||
- is_vector_scalar_bitwise_pattern(n, m)) {
+ is_vector_scalar_bitwise_pattern(n, m) ||
+ is_encode_and_store_pattern(n, m)) {
mstack.push(m, Visit);
return true;
}
@@ -4773,6 +4786,7 @@ instruct loadP(iRegPNoSp dst, memory mem)
// Load Compressed Pointer
instruct loadN(iRegNNoSp dst, memory mem)
%{
+ predicate(n->as_Load()->barrier_data() == 0);
match(Set dst (LoadN mem));
ins_cost(LOAD_COST);
@@ -5208,6 +5222,7 @@ instruct storeimmP0(immP0 zero, memory mem)
// Store Compressed Pointer
instruct storeN(iRegN src, memory mem)
%{
+ predicate(n->as_Store()->barrier_data() == 0);
match(Set mem (StoreN mem src));
ins_cost(STORE_COST);
@@ -5222,6 +5237,7 @@ instruct storeN(iRegN src, memory mem)
instruct storeImmN0(immN0 zero, memory mem)
%{
+ predicate(n->as_Store()->barrier_data() == 0);
match(Set mem (StoreN mem zero));
ins_cost(STORE_COST);
@@ -5412,6 +5428,7 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval
instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
@@ -5533,7 +5550,7 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP new
instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
- predicate(needs_acquiring_load_reserved(n));
+ predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
@@ -5641,6 +5658,7 @@ instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL ne
instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3);
@@ -5774,7 +5792,7 @@ instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL
instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
- predicate(needs_acquiring_load_reserved(n));
+ predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
@@ -5902,6 +5920,7 @@ instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL ne
instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
@@ -6033,7 +6052,7 @@ instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL
instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
- predicate(needs_acquiring_load_reserved(n));
+ predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
@@ -6105,6 +6124,8 @@ instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev)
instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
%{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
+
match(Set prev (GetAndSetN mem newv));
ins_cost(ALU_COST);
@@ -6170,7 +6191,7 @@ instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev)
instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
%{
- predicate(needs_acquiring_load_reserved(n));
+ predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set prev (GetAndSetN mem newv));
diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
index 1a51d7583c9dd..510c0ff5d4646 100644
--- a/src/hotspot/cpu/riscv/riscv_v.ad
+++ b/src/hotspot/cpu/riscv/riscv_v.ad
@@ -4715,6 +4715,34 @@ instruct vsignum_reg(vReg dst, vReg zero, vReg one, vRegMask_V0 v0) %{
ins_pipe(pipe_slow);
%}
+// ---------------- Round float/double Vector Operations ----------------
+
+instruct vround_f(vReg dst, vReg src, fRegF tmp, vRegMask_V0 v0) %{
+ match(Set dst (RoundVF src));
+ effect(TEMP_DEF dst, TEMP tmp, TEMP v0);
+ format %{ "java_round_float_v $dst, $src\t" %}
+ ins_encode %{
+ BasicType bt = Matcher::vector_element_basic_type(this);
+ uint vector_length = Matcher::vector_length(this);
+ __ java_round_float_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_FloatRegister($tmp$$reg), bt, vector_length);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vround_d(vReg dst, vReg src, fRegD tmp, vRegMask_V0 v0) %{
+ match(Set dst (RoundVD src));
+ effect(TEMP_DEF dst, TEMP tmp, TEMP v0);
+ format %{ "java_round_double_v $dst, $src\t" %}
+ ins_encode %{
+ BasicType bt = Matcher::vector_element_basic_type(this);
+ uint vector_length = Matcher::vector_length(this);
+ __ java_round_double_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_FloatRegister($tmp$$reg), bt, vector_length);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
// -------------------------------- Reverse Bytes Vector Operations ------------------------
instruct vreverse_bytes_masked(vReg dst_src, vRegMask_V0 v0) %{
@@ -4867,11 +4895,10 @@ instruct gather_loadS(vReg dst, indirect mem, vReg idx) %{
effect(TEMP_DEF dst);
format %{ "gather_loadS $dst, $mem, $idx" %}
ins_encode %{
- __ vmv1r_v(as_VectorRegister($dst$$reg), as_VectorRegister($idx$$reg));
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
__ vsetvli_helper(bt, Matcher::vector_length(this));
- __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), (int)sew);
+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($idx$$reg), (int)sew);
__ vluxei32_v(as_VectorRegister($dst$$reg), as_Register($mem$$base),
as_VectorRegister($dst$$reg));
%}
@@ -4901,11 +4928,10 @@ instruct gather_loadS_masked(vReg dst, indirect mem, vReg idx, vRegMask_V0 v0, v
effect(TEMP_DEF dst, TEMP tmp);
format %{ "gather_loadS_masked $dst, $mem, $idx, $v0\t# KILL $tmp" %}
ins_encode %{
- __ vmv1r_v(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg));
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
__ vsetvli_helper(bt, Matcher::vector_length(this));
- __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew);
+ __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg), (int)sew);
__ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg),
as_VectorRegister($dst$$reg));
__ vluxei32_v(as_VectorRegister($dst$$reg), as_Register($mem$$base),
@@ -4941,11 +4967,10 @@ instruct scatter_storeS(indirect mem, vReg src, vReg idx, vReg tmp) %{
effect(TEMP tmp);
format %{ "scatter_storeS $mem, $idx, $src\t# KILL $tmp" %}
ins_encode %{
- __ vmv1r_v(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg));
BasicType bt = Matcher::vector_element_basic_type(this, $src);
Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
__ vsetvli_helper(bt, Matcher::vector_length(this, $src));
- __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew);
+ __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg), (int)sew);
__ vsuxei32_v(as_VectorRegister($src$$reg), as_Register($mem$$base),
as_VectorRegister($tmp$$reg));
%}
@@ -4975,11 +5000,10 @@ instruct scatter_storeS_masked(indirect mem, vReg src, vReg idx, vRegMask_V0 v0,
effect(TEMP tmp);
format %{ "scatter_storeS_masked $mem, $idx, $src, $v0\t# KILL $tmp" %}
ins_encode %{
- __ vmv1r_v(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg));
BasicType bt = Matcher::vector_element_basic_type(this, $src);
Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
__ vsetvli_helper(bt, Matcher::vector_length(this, $src));
- __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew);
+ __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg), (int)sew);
__ vsuxei32_v(as_VectorRegister($src$$reg), as_Register($mem$$base),
as_VectorRegister($tmp$$reg), Assembler::v0_t);
%}
diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
index 879fd92272279..27da26d404cc0 100644
--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
@@ -2110,7 +2110,7 @@ void SharedRuntime::generate_deopt_blob() {
int reexecute_offset = __ pc() - start;
#if INCLUDE_JVMCI && !defined(COMPILER1)
- if (EnableJVMCI && UseJVMCICompiler) {
+ if (UseJVMCICompiler) {
// JVMCI does not use this kind of deoptimization
__ should_not_reach_here();
}
diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
index 8792dea7de5eb..8bf3ac5c90163 100644
--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -2428,6 +2428,14 @@ class StubGenerator: public StubCodeGenerator {
__ la(t1, ExternalAddress(bs_asm->patching_epoch_addr()));
__ lwu(t1, t1);
__ sw(t1, thread_epoch_addr);
+ // There are two ways this can work:
+ // - The writer did system icache shootdown after the instruction stream update.
+ // Hence do nothing.
+ // - The writer trust us to make sure our icache is in sync before entering.
+ // Hence use cmodx fence (fence.i, may change).
+ if (UseCtxFencei) {
+ __ cmodx_fence();
+ }
__ membar(__ LoadLoad);
}
@@ -5331,7 +5339,7 @@ class StubGenerator: public StubCodeGenerator {
* NOTE: each field will occupy a single vector register group
*/
void base64_vector_decode_round(Register src, Register dst, Register codec,
- Register size, Register stepSrc, Register stepDst, Register failedIdx, Register minusOne,
+ Register size, Register stepSrc, Register stepDst, Register failedIdx,
VectorRegister inputV1, VectorRegister inputV2, VectorRegister inputV3, VectorRegister inputV4,
VectorRegister idxV1, VectorRegister idxV2, VectorRegister idxV3, VectorRegister idxV4,
VectorRegister outputV1, VectorRegister outputV2, VectorRegister outputV3,
@@ -5358,8 +5366,11 @@ class StubGenerator: public StubCodeGenerator {
__ vor_vv(outputV1, outputV1, outputV2);
__ vmseq_vi(v0, outputV1, -1);
__ vfirst_m(failedIdx, v0);
- Label NoFailure;
- __ beq(failedIdx, minusOne, NoFailure);
+ Label NoFailure, FailureAtIdx0;
+ // valid value can only be -1 when < 0
+ __ bltz(failedIdx, NoFailure);
+ // when the first data (at index 0) fails, no need to process data anymore
+ __ beqz(failedIdx, FailureAtIdx0);
__ vsetvli(x0, failedIdx, Assembler::e8, lmul, Assembler::mu, Assembler::tu);
__ slli(stepDst, failedIdx, 1);
__ add(stepDst, failedIdx, stepDst);
@@ -5382,6 +5393,7 @@ class StubGenerator: public StubCodeGenerator {
// dst = dst + register_group_len_bytes * 3
__ add(dst, dst, stepDst);
+ __ BIND(FailureAtIdx0);
}
/**
@@ -5487,9 +5499,7 @@ class StubGenerator: public StubCodeGenerator {
Register stepSrcM2 = doff;
Register stepDst = isURL;
Register size = x29; // t4
- Register minusOne = x30; // t5
- __ mv(minusOne, -1);
__ mv(size, MaxVectorSize * 2);
__ mv(stepSrcM1, MaxVectorSize * 4);
__ slli(stepSrcM2, stepSrcM1, 1);
@@ -5501,7 +5511,7 @@ class StubGenerator: public StubCodeGenerator {
// Assembler::m2
__ BIND(ProcessM2);
base64_vector_decode_round(src, dst, codec,
- size, stepSrcM2, stepDst, failedIdx, minusOne,
+ size, stepSrcM2, stepDst, failedIdx,
v2, v4, v6, v8, // inputs
v10, v12, v14, v16, // indexes
v18, v20, v22, // outputs
@@ -5509,7 +5519,8 @@ class StubGenerator: public StubCodeGenerator {
__ sub(length, length, stepSrcM2);
// error check
- __ bne(failedIdx, minusOne, Exit);
+ // valid value of failedIdx can only be -1 when < 0
+ __ bgez(failedIdx, Exit);
__ bge(length, stepSrcM2, ProcessM2);
@@ -5521,7 +5532,7 @@ class StubGenerator: public StubCodeGenerator {
__ srli(size, size, 1);
__ srli(stepDst, stepDst, 1);
base64_vector_decode_round(src, dst, codec,
- size, stepSrcM1, stepDst, failedIdx, minusOne,
+ size, stepSrcM1, stepDst, failedIdx,
v1, v2, v3, v4, // inputs
v5, v6, v7, v8, // indexes
v9, v10, v11, // outputs
@@ -5529,7 +5540,8 @@ class StubGenerator: public StubCodeGenerator {
__ sub(length, length, stepSrcM1);
// error check
- __ bne(failedIdx, minusOne, Exit);
+ // valid value of failedIdx can only be -1 when < 0
+ __ bgez(failedIdx, Exit);
__ BIND(ProcessScalar);
__ beqz(length, Exit);
@@ -6080,26 +6092,17 @@ static const int64_t right_3_bits = right_n_bits(3);
address start = __ pc();
+ // input parameters
const Register crc = c_rarg0; // crc
const Register buf = c_rarg1; // source java byte array address
const Register len = c_rarg2; // length
- const Register table0 = c_rarg3; // crc_table address
- const Register table1 = c_rarg4;
- const Register table2 = c_rarg5;
- const Register table3 = c_rarg6;
-
- const Register tmp1 = c_rarg7;
- const Register tmp2 = t2;
- const Register tmp3 = x28; // t3
- const Register tmp4 = x29; // t4
- const Register tmp5 = x30; // t5
- const Register tmp6 = x31; // t6
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
- __ kernel_crc32(crc, buf, len, table0, table1, table2,
- table3, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
+ __ kernel_crc32(crc, buf, len,
+ c_rarg3, c_rarg4, c_rarg5, c_rarg6, // tmp's for tables
+ c_rarg7, t2, x28, x29, x30, x31); // misc tmps
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret();
@@ -6121,6 +6124,29 @@ static const int64_t right_3_bits = right_n_bits(3);
return start;
}
+ // load Method* target of MethodHandle
+ // j_rarg0 = jobject receiver
+ // xmethod = Method* result
+ address generate_upcall_stub_load_target() {
+
+ StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target");
+ address start = __ pc();
+
+ __ resolve_global_jobject(j_rarg0, t0, t1);
+ // Load target method from receiver
+ __ load_heap_oop(xmethod, Address(j_rarg0, java_lang_invoke_MethodHandle::form_offset()), t0, t1);
+ __ load_heap_oop(xmethod, Address(xmethod, java_lang_invoke_LambdaForm::vmentry_offset()), t0, t1);
+ __ load_heap_oop(xmethod, Address(xmethod, java_lang_invoke_MemberName::method_offset()), t0, t1);
+ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod,
+ Address(xmethod, java_lang_invoke_ResolvedMethodName::vmtarget_offset()),
+ noreg, noreg);
+ __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset())); // just in case callee is deoptimized
+
+ __ ret();
+
+ return start;
+ }
+
#undef __
// Initialization
@@ -6186,6 +6212,7 @@ static const int64_t right_3_bits = right_n_bits(3);
#endif // COMPILER2
StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler();
+ StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target();
StubRoutines::riscv::set_completed();
}
diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
index 05bdeaf757078..6d5492b86b3c3 100644
--- a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
@@ -276,4 +276,219 @@ ATTRIBUTE_ALIGNED(4096) juint StubRoutines::riscv::_crc_table[] =
0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
0xde0506f1UL,
+
+ // Tables for vector version
+ // This improvement (vectorization) is based on java.base/share/native/libzip/zlib/zcrc32.c.
+ // To make it, following steps are taken:
+ // 1. in zcrc32.c, modify N to 16 and related code,
+ // 2. re-generate the tables needed, we use tables of (N == 16, W == 4)
+ // 3. finally vectorize the code (original implementation in zcrc32.c is just scalar code).
+ 0x00000000, 0x8f352d95, 0xc51b5d6b, 0x4a2e70fe, 0x5147bc97,
+ 0xde729102, 0x945ce1fc, 0x1b69cc69, 0xa28f792e, 0x2dba54bb,
+ 0x67942445, 0xe8a109d0, 0xf3c8c5b9, 0x7cfde82c, 0x36d398d2,
+ 0xb9e6b547, 0x9e6ff41d, 0x115ad988, 0x5b74a976, 0xd44184e3,
+ 0xcf28488a, 0x401d651f, 0x0a3315e1, 0x85063874, 0x3ce08d33,
+ 0xb3d5a0a6, 0xf9fbd058, 0x76cefdcd, 0x6da731a4, 0xe2921c31,
+ 0xa8bc6ccf, 0x2789415a, 0xe7aeee7b, 0x689bc3ee, 0x22b5b310,
+ 0xad809e85, 0xb6e952ec, 0x39dc7f79, 0x73f20f87, 0xfcc72212,
+ 0x45219755, 0xca14bac0, 0x803aca3e, 0x0f0fe7ab, 0x14662bc2,
+ 0x9b530657, 0xd17d76a9, 0x5e485b3c, 0x79c11a66, 0xf6f437f3,
+ 0xbcda470d, 0x33ef6a98, 0x2886a6f1, 0xa7b38b64, 0xed9dfb9a,
+ 0x62a8d60f, 0xdb4e6348, 0x547b4edd, 0x1e553e23, 0x916013b6,
+ 0x8a09dfdf, 0x053cf24a, 0x4f1282b4, 0xc027af21, 0x142cdab7,
+ 0x9b19f722, 0xd13787dc, 0x5e02aa49, 0x456b6620, 0xca5e4bb5,
+ 0x80703b4b, 0x0f4516de, 0xb6a3a399, 0x39968e0c, 0x73b8fef2,
+ 0xfc8dd367, 0xe7e41f0e, 0x68d1329b, 0x22ff4265, 0xadca6ff0,
+ 0x8a432eaa, 0x0576033f, 0x4f5873c1, 0xc06d5e54, 0xdb04923d,
+ 0x5431bfa8, 0x1e1fcf56, 0x912ae2c3, 0x28cc5784, 0xa7f97a11,
+ 0xedd70aef, 0x62e2277a, 0x798beb13, 0xf6bec686, 0xbc90b678,
+ 0x33a59bed, 0xf38234cc, 0x7cb71959, 0x369969a7, 0xb9ac4432,
+ 0xa2c5885b, 0x2df0a5ce, 0x67ded530, 0xe8ebf8a5, 0x510d4de2,
+ 0xde386077, 0x94161089, 0x1b233d1c, 0x004af175, 0x8f7fdce0,
+ 0xc551ac1e, 0x4a64818b, 0x6dedc0d1, 0xe2d8ed44, 0xa8f69dba,
+ 0x27c3b02f, 0x3caa7c46, 0xb39f51d3, 0xf9b1212d, 0x76840cb8,
+ 0xcf62b9ff, 0x4057946a, 0x0a79e494, 0x854cc901, 0x9e250568,
+ 0x111028fd, 0x5b3e5803, 0xd40b7596, 0x2859b56e, 0xa76c98fb,
+ 0xed42e805, 0x6277c590, 0x791e09f9, 0xf62b246c, 0xbc055492,
+ 0x33307907, 0x8ad6cc40, 0x05e3e1d5, 0x4fcd912b, 0xc0f8bcbe,
+ 0xdb9170d7, 0x54a45d42, 0x1e8a2dbc, 0x91bf0029, 0xb6364173,
+ 0x39036ce6, 0x732d1c18, 0xfc18318d, 0xe771fde4, 0x6844d071,
+ 0x226aa08f, 0xad5f8d1a, 0x14b9385d, 0x9b8c15c8, 0xd1a26536,
+ 0x5e9748a3, 0x45fe84ca, 0xcacba95f, 0x80e5d9a1, 0x0fd0f434,
+ 0xcff75b15, 0x40c27680, 0x0aec067e, 0x85d92beb, 0x9eb0e782,
+ 0x1185ca17, 0x5babbae9, 0xd49e977c, 0x6d78223b, 0xe24d0fae,
+ 0xa8637f50, 0x275652c5, 0x3c3f9eac, 0xb30ab339, 0xf924c3c7,
+ 0x7611ee52, 0x5198af08, 0xdead829d, 0x9483f263, 0x1bb6dff6,
+ 0x00df139f, 0x8fea3e0a, 0xc5c44ef4, 0x4af16361, 0xf317d626,
+ 0x7c22fbb3, 0x360c8b4d, 0xb939a6d8, 0xa2506ab1, 0x2d654724,
+ 0x674b37da, 0xe87e1a4f, 0x3c756fd9, 0xb340424c, 0xf96e32b2,
+ 0x765b1f27, 0x6d32d34e, 0xe207fedb, 0xa8298e25, 0x271ca3b0,
+ 0x9efa16f7, 0x11cf3b62, 0x5be14b9c, 0xd4d46609, 0xcfbdaa60,
+ 0x408887f5, 0x0aa6f70b, 0x8593da9e, 0xa21a9bc4, 0x2d2fb651,
+ 0x6701c6af, 0xe834eb3a, 0xf35d2753, 0x7c680ac6, 0x36467a38,
+ 0xb97357ad, 0x0095e2ea, 0x8fa0cf7f, 0xc58ebf81, 0x4abb9214,
+ 0x51d25e7d, 0xdee773e8, 0x94c90316, 0x1bfc2e83, 0xdbdb81a2,
+ 0x54eeac37, 0x1ec0dcc9, 0x91f5f15c, 0x8a9c3d35, 0x05a910a0,
+ 0x4f87605e, 0xc0b24dcb, 0x7954f88c, 0xf661d519, 0xbc4fa5e7,
+ 0x337a8872, 0x2813441b, 0xa726698e, 0xed081970, 0x623d34e5,
+ 0x45b475bf, 0xca81582a, 0x80af28d4, 0x0f9a0541, 0x14f3c928,
+ 0x9bc6e4bd, 0xd1e89443, 0x5eddb9d6, 0xe73b0c91, 0x680e2104,
+ 0x222051fa, 0xad157c6f, 0xb67cb006, 0x39499d93, 0x7367ed6d,
+ 0xfc52c0f8,
+ 0x00000000, 0x50b36adc, 0xa166d5b8, 0xf1d5bf64, 0x99bcad31,
+ 0xc90fc7ed, 0x38da7889, 0x68691255, 0xe8085c23, 0xb8bb36ff,
+ 0x496e899b, 0x19dde347, 0x71b4f112, 0x21079bce, 0xd0d224aa,
+ 0x80614e76, 0x0b61be07, 0x5bd2d4db, 0xaa076bbf, 0xfab40163,
+ 0x92dd1336, 0xc26e79ea, 0x33bbc68e, 0x6308ac52, 0xe369e224,
+ 0xb3da88f8, 0x420f379c, 0x12bc5d40, 0x7ad54f15, 0x2a6625c9,
+ 0xdbb39aad, 0x8b00f071, 0x16c37c0e, 0x467016d2, 0xb7a5a9b6,
+ 0xe716c36a, 0x8f7fd13f, 0xdfccbbe3, 0x2e190487, 0x7eaa6e5b,
+ 0xfecb202d, 0xae784af1, 0x5fadf595, 0x0f1e9f49, 0x67778d1c,
+ 0x37c4e7c0, 0xc61158a4, 0x96a23278, 0x1da2c209, 0x4d11a8d5,
+ 0xbcc417b1, 0xec777d6d, 0x841e6f38, 0xd4ad05e4, 0x2578ba80,
+ 0x75cbd05c, 0xf5aa9e2a, 0xa519f4f6, 0x54cc4b92, 0x047f214e,
+ 0x6c16331b, 0x3ca559c7, 0xcd70e6a3, 0x9dc38c7f, 0x2d86f81c,
+ 0x7d3592c0, 0x8ce02da4, 0xdc534778, 0xb43a552d, 0xe4893ff1,
+ 0x155c8095, 0x45efea49, 0xc58ea43f, 0x953dcee3, 0x64e87187,
+ 0x345b1b5b, 0x5c32090e, 0x0c8163d2, 0xfd54dcb6, 0xade7b66a,
+ 0x26e7461b, 0x76542cc7, 0x878193a3, 0xd732f97f, 0xbf5beb2a,
+ 0xefe881f6, 0x1e3d3e92, 0x4e8e544e, 0xceef1a38, 0x9e5c70e4,
+ 0x6f89cf80, 0x3f3aa55c, 0x5753b709, 0x07e0ddd5, 0xf63562b1,
+ 0xa686086d, 0x3b458412, 0x6bf6eece, 0x9a2351aa, 0xca903b76,
+ 0xa2f92923, 0xf24a43ff, 0x039ffc9b, 0x532c9647, 0xd34dd831,
+ 0x83feb2ed, 0x722b0d89, 0x22986755, 0x4af17500, 0x1a421fdc,
+ 0xeb97a0b8, 0xbb24ca64, 0x30243a15, 0x609750c9, 0x9142efad,
+ 0xc1f18571, 0xa9989724, 0xf92bfdf8, 0x08fe429c, 0x584d2840,
+ 0xd82c6636, 0x889f0cea, 0x794ab38e, 0x29f9d952, 0x4190cb07,
+ 0x1123a1db, 0xe0f61ebf, 0xb0457463, 0x5b0df038, 0x0bbe9ae4,
+ 0xfa6b2580, 0xaad84f5c, 0xc2b15d09, 0x920237d5, 0x63d788b1,
+ 0x3364e26d, 0xb305ac1b, 0xe3b6c6c7, 0x126379a3, 0x42d0137f,
+ 0x2ab9012a, 0x7a0a6bf6, 0x8bdfd492, 0xdb6cbe4e, 0x506c4e3f,
+ 0x00df24e3, 0xf10a9b87, 0xa1b9f15b, 0xc9d0e30e, 0x996389d2,
+ 0x68b636b6, 0x38055c6a, 0xb864121c, 0xe8d778c0, 0x1902c7a4,
+ 0x49b1ad78, 0x21d8bf2d, 0x716bd5f1, 0x80be6a95, 0xd00d0049,
+ 0x4dce8c36, 0x1d7de6ea, 0xeca8598e, 0xbc1b3352, 0xd4722107,
+ 0x84c14bdb, 0x7514f4bf, 0x25a79e63, 0xa5c6d015, 0xf575bac9,
+ 0x04a005ad, 0x54136f71, 0x3c7a7d24, 0x6cc917f8, 0x9d1ca89c,
+ 0xcdafc240, 0x46af3231, 0x161c58ed, 0xe7c9e789, 0xb77a8d55,
+ 0xdf139f00, 0x8fa0f5dc, 0x7e754ab8, 0x2ec62064, 0xaea76e12,
+ 0xfe1404ce, 0x0fc1bbaa, 0x5f72d176, 0x371bc323, 0x67a8a9ff,
+ 0x967d169b, 0xc6ce7c47, 0x768b0824, 0x263862f8, 0xd7eddd9c,
+ 0x875eb740, 0xef37a515, 0xbf84cfc9, 0x4e5170ad, 0x1ee21a71,
+ 0x9e835407, 0xce303edb, 0x3fe581bf, 0x6f56eb63, 0x073ff936,
+ 0x578c93ea, 0xa6592c8e, 0xf6ea4652, 0x7deab623, 0x2d59dcff,
+ 0xdc8c639b, 0x8c3f0947, 0xe4561b12, 0xb4e571ce, 0x4530ceaa,
+ 0x1583a476, 0x95e2ea00, 0xc55180dc, 0x34843fb8, 0x64375564,
+ 0x0c5e4731, 0x5ced2ded, 0xad389289, 0xfd8bf855, 0x6048742a,
+ 0x30fb1ef6, 0xc12ea192, 0x919dcb4e, 0xf9f4d91b, 0xa947b3c7,
+ 0x58920ca3, 0x0821667f, 0x88402809, 0xd8f342d5, 0x2926fdb1,
+ 0x7995976d, 0x11fc8538, 0x414fefe4, 0xb09a5080, 0xe0293a5c,
+ 0x6b29ca2d, 0x3b9aa0f1, 0xca4f1f95, 0x9afc7549, 0xf295671c,
+ 0xa2260dc0, 0x53f3b2a4, 0x0340d878, 0x8321960e, 0xd392fcd2,
+ 0x224743b6, 0x72f4296a, 0x1a9d3b3f, 0x4a2e51e3, 0xbbfbee87,
+ 0xeb48845b,
+ 0x00000000, 0xb61be070, 0xb746c6a1, 0x015d26d1, 0xb5fc8b03,
+ 0x03e76b73, 0x02ba4da2, 0xb4a1add2, 0xb0881047, 0x0693f037,
+ 0x07ced6e6, 0xb1d53696, 0x05749b44, 0xb36f7b34, 0xb2325de5,
+ 0x0429bd95, 0xba6126cf, 0x0c7ac6bf, 0x0d27e06e, 0xbb3c001e,
+ 0x0f9dadcc, 0xb9864dbc, 0xb8db6b6d, 0x0ec08b1d, 0x0ae93688,
+ 0xbcf2d6f8, 0xbdaff029, 0x0bb41059, 0xbf15bd8b, 0x090e5dfb,
+ 0x08537b2a, 0xbe489b5a, 0xafb34bdf, 0x19a8abaf, 0x18f58d7e,
+ 0xaeee6d0e, 0x1a4fc0dc, 0xac5420ac, 0xad09067d, 0x1b12e60d,
+ 0x1f3b5b98, 0xa920bbe8, 0xa87d9d39, 0x1e667d49, 0xaac7d09b,
+ 0x1cdc30eb, 0x1d81163a, 0xab9af64a, 0x15d26d10, 0xa3c98d60,
+ 0xa294abb1, 0x148f4bc1, 0xa02ee613, 0x16350663, 0x176820b2,
+ 0xa173c0c2, 0xa55a7d57, 0x13419d27, 0x121cbbf6, 0xa4075b86,
+ 0x10a6f654, 0xa6bd1624, 0xa7e030f5, 0x11fbd085, 0x841791ff,
+ 0x320c718f, 0x3351575e, 0x854ab72e, 0x31eb1afc, 0x87f0fa8c,
+ 0x86addc5d, 0x30b63c2d, 0x349f81b8, 0x828461c8, 0x83d94719,
+ 0x35c2a769, 0x81630abb, 0x3778eacb, 0x3625cc1a, 0x803e2c6a,
+ 0x3e76b730, 0x886d5740, 0x89307191, 0x3f2b91e1, 0x8b8a3c33,
+ 0x3d91dc43, 0x3cccfa92, 0x8ad71ae2, 0x8efea777, 0x38e54707,
+ 0x39b861d6, 0x8fa381a6, 0x3b022c74, 0x8d19cc04, 0x8c44ead5,
+ 0x3a5f0aa5, 0x2ba4da20, 0x9dbf3a50, 0x9ce21c81, 0x2af9fcf1,
+ 0x9e585123, 0x2843b153, 0x291e9782, 0x9f0577f2, 0x9b2cca67,
+ 0x2d372a17, 0x2c6a0cc6, 0x9a71ecb6, 0x2ed04164, 0x98cba114,
+ 0x999687c5, 0x2f8d67b5, 0x91c5fcef, 0x27de1c9f, 0x26833a4e,
+ 0x9098da3e, 0x243977ec, 0x9222979c, 0x937fb14d, 0x2564513d,
+ 0x214deca8, 0x97560cd8, 0x960b2a09, 0x2010ca79, 0x94b167ab,
+ 0x22aa87db, 0x23f7a10a, 0x95ec417a, 0xd35e25bf, 0x6545c5cf,
+ 0x6418e31e, 0xd203036e, 0x66a2aebc, 0xd0b94ecc, 0xd1e4681d,
+ 0x67ff886d, 0x63d635f8, 0xd5cdd588, 0xd490f359, 0x628b1329,
+ 0xd62abefb, 0x60315e8b, 0x616c785a, 0xd777982a, 0x693f0370,
+ 0xdf24e300, 0xde79c5d1, 0x686225a1, 0xdcc38873, 0x6ad86803,
+ 0x6b854ed2, 0xdd9eaea2, 0xd9b71337, 0x6facf347, 0x6ef1d596,
+ 0xd8ea35e6, 0x6c4b9834, 0xda507844, 0xdb0d5e95, 0x6d16bee5,
+ 0x7ced6e60, 0xcaf68e10, 0xcbaba8c1, 0x7db048b1, 0xc911e563,
+ 0x7f0a0513, 0x7e5723c2, 0xc84cc3b2, 0xcc657e27, 0x7a7e9e57,
+ 0x7b23b886, 0xcd3858f6, 0x7999f524, 0xcf821554, 0xcedf3385,
+ 0x78c4d3f5, 0xc68c48af, 0x7097a8df, 0x71ca8e0e, 0xc7d16e7e,
+ 0x7370c3ac, 0xc56b23dc, 0xc436050d, 0x722de57d, 0x760458e8,
+ 0xc01fb898, 0xc1429e49, 0x77597e39, 0xc3f8d3eb, 0x75e3339b,
+ 0x74be154a, 0xc2a5f53a, 0x5749b440, 0xe1525430, 0xe00f72e1,
+ 0x56149291, 0xe2b53f43, 0x54aedf33, 0x55f3f9e2, 0xe3e81992,
+ 0xe7c1a407, 0x51da4477, 0x508762a6, 0xe69c82d6, 0x523d2f04,
+ 0xe426cf74, 0xe57be9a5, 0x536009d5, 0xed28928f, 0x5b3372ff,
+ 0x5a6e542e, 0xec75b45e, 0x58d4198c, 0xeecff9fc, 0xef92df2d,
+ 0x59893f5d, 0x5da082c8, 0xebbb62b8, 0xeae64469, 0x5cfda419,
+ 0xe85c09cb, 0x5e47e9bb, 0x5f1acf6a, 0xe9012f1a, 0xf8faff9f,
+ 0x4ee11fef, 0x4fbc393e, 0xf9a7d94e, 0x4d06749c, 0xfb1d94ec,
+ 0xfa40b23d, 0x4c5b524d, 0x4872efd8, 0xfe690fa8, 0xff342979,
+ 0x492fc909, 0xfd8e64db, 0x4b9584ab, 0x4ac8a27a, 0xfcd3420a,
+ 0x429bd950, 0xf4803920, 0xf5dd1ff1, 0x43c6ff81, 0xf7675253,
+ 0x417cb223, 0x402194f2, 0xf63a7482, 0xf213c917, 0x44082967,
+ 0x45550fb6, 0xf34eefc6, 0x47ef4214, 0xf1f4a264, 0xf0a984b5,
+ 0x46b264c5,
+ 0x00000000, 0x7dcd4d3f, 0xfb9a9a7e, 0x8657d741, 0x2c4432bd,
+ 0x51897f82, 0xd7dea8c3, 0xaa13e5fc, 0x5888657a, 0x25452845,
+ 0xa312ff04, 0xdedfb23b, 0x74cc57c7, 0x09011af8, 0x8f56cdb9,
+ 0xf29b8086, 0xb110caf4, 0xccdd87cb, 0x4a8a508a, 0x37471db5,
+ 0x9d54f849, 0xe099b576, 0x66ce6237, 0x1b032f08, 0xe998af8e,
+ 0x9455e2b1, 0x120235f0, 0x6fcf78cf, 0xc5dc9d33, 0xb811d00c,
+ 0x3e46074d, 0x438b4a72, 0xb95093a9, 0xc49dde96, 0x42ca09d7,
+ 0x3f0744e8, 0x9514a114, 0xe8d9ec2b, 0x6e8e3b6a, 0x13437655,
+ 0xe1d8f6d3, 0x9c15bbec, 0x1a426cad, 0x678f2192, 0xcd9cc46e,
+ 0xb0518951, 0x36065e10, 0x4bcb132f, 0x0840595d, 0x758d1462,
+ 0xf3dac323, 0x8e178e1c, 0x24046be0, 0x59c926df, 0xdf9ef19e,
+ 0xa253bca1, 0x50c83c27, 0x2d057118, 0xab52a659, 0xd69feb66,
+ 0x7c8c0e9a, 0x014143a5, 0x871694e4, 0xfadbd9db, 0xa9d02113,
+ 0xd41d6c2c, 0x524abb6d, 0x2f87f652, 0x859413ae, 0xf8595e91,
+ 0x7e0e89d0, 0x03c3c4ef, 0xf1584469, 0x8c950956, 0x0ac2de17,
+ 0x770f9328, 0xdd1c76d4, 0xa0d13beb, 0x2686ecaa, 0x5b4ba195,
+ 0x18c0ebe7, 0x650da6d8, 0xe35a7199, 0x9e973ca6, 0x3484d95a,
+ 0x49499465, 0xcf1e4324, 0xb2d30e1b, 0x40488e9d, 0x3d85c3a2,
+ 0xbbd214e3, 0xc61f59dc, 0x6c0cbc20, 0x11c1f11f, 0x9796265e,
+ 0xea5b6b61, 0x1080b2ba, 0x6d4dff85, 0xeb1a28c4, 0x96d765fb,
+ 0x3cc48007, 0x4109cd38, 0xc75e1a79, 0xba935746, 0x4808d7c0,
+ 0x35c59aff, 0xb3924dbe, 0xce5f0081, 0x644ce57d, 0x1981a842,
+ 0x9fd67f03, 0xe21b323c, 0xa190784e, 0xdc5d3571, 0x5a0ae230,
+ 0x27c7af0f, 0x8dd44af3, 0xf01907cc, 0x764ed08d, 0x0b839db2,
+ 0xf9181d34, 0x84d5500b, 0x0282874a, 0x7f4fca75, 0xd55c2f89,
+ 0xa89162b6, 0x2ec6b5f7, 0x530bf8c8, 0x88d14467, 0xf51c0958,
+ 0x734bde19, 0x0e869326, 0xa49576da, 0xd9583be5, 0x5f0feca4,
+ 0x22c2a19b, 0xd059211d, 0xad946c22, 0x2bc3bb63, 0x560ef65c,
+ 0xfc1d13a0, 0x81d05e9f, 0x078789de, 0x7a4ac4e1, 0x39c18e93,
+ 0x440cc3ac, 0xc25b14ed, 0xbf9659d2, 0x1585bc2e, 0x6848f111,
+ 0xee1f2650, 0x93d26b6f, 0x6149ebe9, 0x1c84a6d6, 0x9ad37197,
+ 0xe71e3ca8, 0x4d0dd954, 0x30c0946b, 0xb697432a, 0xcb5a0e15,
+ 0x3181d7ce, 0x4c4c9af1, 0xca1b4db0, 0xb7d6008f, 0x1dc5e573,
+ 0x6008a84c, 0xe65f7f0d, 0x9b923232, 0x6909b2b4, 0x14c4ff8b,
+ 0x929328ca, 0xef5e65f5, 0x454d8009, 0x3880cd36, 0xbed71a77,
+ 0xc31a5748, 0x80911d3a, 0xfd5c5005, 0x7b0b8744, 0x06c6ca7b,
+ 0xacd52f87, 0xd11862b8, 0x574fb5f9, 0x2a82f8c6, 0xd8197840,
+ 0xa5d4357f, 0x2383e23e, 0x5e4eaf01, 0xf45d4afd, 0x899007c2,
+ 0x0fc7d083, 0x720a9dbc, 0x21016574, 0x5ccc284b, 0xda9bff0a,
+ 0xa756b235, 0x0d4557c9, 0x70881af6, 0xf6dfcdb7, 0x8b128088,
+ 0x7989000e, 0x04444d31, 0x82139a70, 0xffded74f, 0x55cd32b3,
+ 0x28007f8c, 0xae57a8cd, 0xd39ae5f2, 0x9011af80, 0xeddce2bf,
+ 0x6b8b35fe, 0x164678c1, 0xbc559d3d, 0xc198d002, 0x47cf0743,
+ 0x3a024a7c, 0xc899cafa, 0xb55487c5, 0x33035084, 0x4ece1dbb,
+ 0xe4ddf847, 0x9910b578, 0x1f476239, 0x628a2f06, 0x9851f6dd,
+ 0xe59cbbe2, 0x63cb6ca3, 0x1e06219c, 0xb415c460, 0xc9d8895f,
+ 0x4f8f5e1e, 0x32421321, 0xc0d993a7, 0xbd14de98, 0x3b4309d9,
+ 0x468e44e6, 0xec9da11a, 0x9150ec25, 0x17073b64, 0x6aca765b,
+ 0x29413c29, 0x548c7116, 0xd2dba657, 0xaf16eb68, 0x05050e94,
+ 0x78c843ab, 0xfe9f94ea, 0x8352d9d5, 0x71c95953, 0x0c04146c,
+ 0x8a53c32d, 0xf79e8e12, 0x5d8d6bee, 0x204026d1, 0xa617f190,
+ 0xdbdabcaf
};
diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
index 078f54adc3682..2e6902180a892 100644
--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
@@ -178,7 +178,6 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
__ la(temp_reg, Address(temp_reg, in_bytes(ResolvedFieldEntry::put_code_offset())));
}
// Load-acquire the bytecode to match store-release in ResolvedFieldEntry::fill_in()
- __ membar(MacroAssembler::AnyAny);
__ lbu(temp_reg, Address(temp_reg, 0));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ mv(bc_reg, bc);
@@ -320,7 +319,6 @@ void TemplateTable::ldc(LdcType type) {
// get type
__ addi(x13, x11, tags_offset);
__ add(x13, x10, x13);
- __ membar(MacroAssembler::AnyAny);
__ lbu(x13, Address(x13, 0));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
@@ -2189,7 +2187,6 @@ void TemplateTable::resolve_cache_and_index_for_method(int byte_no,
break;
}
// Load-acquire the bytecode to match store-release in InterpreterRuntime
- __ membar(MacroAssembler::AnyAny);
__ lbu(temp, Address(temp, 0));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
@@ -2241,7 +2238,6 @@ void TemplateTable::resolve_cache_and_index_for_field(int byte_no,
__ la(temp, Address(Rcache, in_bytes(ResolvedFieldEntry::put_code_offset())));
}
// Load-acquire the bytecode to match store-release in ResolvedFieldEntry::fill_in()
- __ membar(MacroAssembler::AnyAny);
__ lbu(temp, Address(temp, 0));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ mv(t0, (int) code); // have we resolved this bytecode?
@@ -2403,7 +2399,6 @@ void TemplateTable::load_invokedynamic_entry(Register method) {
Label resolved;
__ load_resolved_indy_entry(cache, index);
- __ membar(MacroAssembler::AnyAny);
__ ld(method, Address(cache, in_bytes(ResolvedIndyEntry::method_offset())));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
@@ -2418,7 +2413,6 @@ void TemplateTable::load_invokedynamic_entry(Register method) {
__ call_VM(noreg, entry, method);
// Update registers with resolved info
__ load_resolved_indy_entry(cache, index);
- __ membar(MacroAssembler::AnyAny);
__ ld(method, Address(cache, in_bytes(ResolvedIndyEntry::method_offset())));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
@@ -3533,7 +3527,6 @@ void TemplateTable::_new() {
const int tags_offset = Array::base_offset_in_bytes();
__ add(t0, x10, x13);
__ la(t0, Address(t0, tags_offset));
- __ membar(MacroAssembler::AnyAny);
__ lbu(t0, t0);
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ sub(t1, t0, (u1)JVM_CONSTANT_Class);
@@ -3651,7 +3644,6 @@ void TemplateTable::checkcast() {
// See if bytecode has already been quicked
__ add(t0, x13, Array::base_offset_in_bytes());
__ add(x11, t0, x9);
- __ membar(MacroAssembler::AnyAny);
__ lbu(x11, x11);
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ sub(t0, x11, (u1)JVM_CONSTANT_Class);
@@ -3707,7 +3699,6 @@ void TemplateTable::instanceof() {
// See if bytecode has already been quicked
__ add(t0, x13, Array::base_offset_in_bytes());
__ add(x11, t0, x9);
- __ membar(MacroAssembler::AnyAny);
__ lbu(x11, x11);
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ sub(t0, x11, (u1)JVM_CONSTANT_Class);
diff --git a/src/hotspot/cpu/riscv/upcallLinker_riscv.cpp b/src/hotspot/cpu/riscv/upcallLinker_riscv.cpp
index 383f332f8fd94..55160be99d0d8 100644
--- a/src/hotspot/cpu/riscv/upcallLinker_riscv.cpp
+++ b/src/hotspot/cpu/riscv/upcallLinker_riscv.cpp
@@ -25,6 +25,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
+#include "classfile/javaClasses.hpp"
#include "logging/logStream.hpp"
#include "memory/resourceArea.hpp"
#include "prims/upcallLinker.hpp"
@@ -117,7 +118,7 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr
static const int upcall_stub_code_base_size = 1024;
static const int upcall_stub_size_per_arg = 16;
-address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
@@ -223,7 +224,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
__ block_comment("{ on_entry");
__ la(c_rarg0, Address(sp, frame_data_offset));
- __ movptr(c_rarg1, (address) receiver);
__ rt_call(CAST_FROM_FN_PTR(address, UpcallLinker::on_entry));
__ mv(xthread, x10);
__ reinit_heapbase();
@@ -260,12 +260,10 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
arg_shuffle.generate(_masm, as_VMStorage(shuffle_reg), abi._shadow_space_bytes, 0);
__ block_comment("} argument shuffle");
- __ block_comment("{ receiver ");
- __ get_vm_result(j_rarg0, xthread);
- __ block_comment("} receiver ");
-
- __ mov_metadata(xmethod, entry);
- __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset())); // just in case callee is deoptimized
+ __ block_comment("{ load target ");
+ __ movptr(j_rarg0, (address) receiver);
+ __ far_call(RuntimeAddress(StubRoutines::upcall_stub_load_target())); // loads Method* into xmethod
+ __ block_comment("} load target ");
__ push_cont_fastpath(xthread);
@@ -338,7 +336,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
#ifndef PRODUCT
stringStream ss;
- ss.print("upcall_stub_%s", entry->signature()->as_C_string());
+ ss.print("upcall_stub_%s", signature->as_C_string());
const char *name = _masm->code_string(ss.as_string());
#else // PRODUCT
const char* name = "upcall_stub";
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
index bd4bfe86d9bf7..8fdde0094f40d 100644
--- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
@@ -285,6 +285,7 @@ class VM_Version : public Abstract_VM_Version {
// RISCV64 supports fast class initialization checks
static bool supports_fast_class_init_checks() { return true; }
+ static bool supports_fencei_barrier() { return ext_Zifencei.enabled(); }
};
#endif // CPU_RISCV_VM_VERSION_RISCV_HPP
diff --git a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp
index b7f1d3605681a..e01e4458e38d3 100644
--- a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp
@@ -48,7 +48,7 @@ void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
void RangeCheckStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
if (_info->deoptimize_on_exception()) {
- address a = Runtime1::entry_for (Runtime1::predicate_failed_trap_id);
+ address a = Runtime1::entry_for (C1StubId::predicate_failed_trap_id);
ce->emit_call_c(a);
CHECK_BAILOUT();
ce->add_call_info_here(_info);
@@ -64,11 +64,11 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
__ load_const_optimized(Z_R1_scratch, _index->as_jint());
}
- Runtime1::StubID stub_id;
+ C1StubId stub_id;
if (_throw_index_out_of_bounds_exception) {
- stub_id = Runtime1::throw_index_exception_id;
+ stub_id = C1StubId::throw_index_exception_id;
} else {
- stub_id = Runtime1::throw_range_check_failed_id;
+ stub_id = C1StubId::throw_range_check_failed_id;
__ lgr_if_needed(Z_R0_scratch, _array->as_pointer_register());
}
ce->emit_call_c(Runtime1::entry_for (stub_id));
@@ -84,7 +84,7 @@ PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
- address a = Runtime1::entry_for (Runtime1::predicate_failed_trap_id);
+ address a = Runtime1::entry_for (C1StubId::predicate_failed_trap_id);
ce->emit_call_c(a);
CHECK_BAILOUT();
ce->add_call_info_here(_info);
@@ -102,7 +102,7 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
}
ce->store_parameter(/*_method->as_register()*/ Z_R1_scratch, 1);
ce->store_parameter(_bci, 0);
- ce->emit_call_c(Runtime1::entry_for (Runtime1::counter_overflow_id));
+ ce->emit_call_c(Runtime1::entry_for (C1StubId::counter_overflow_id));
CHECK_BAILOUT();
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
@@ -114,7 +114,7 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) {
ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
}
__ bind(_entry);
- ce->emit_call_c(Runtime1::entry_for (Runtime1::throw_div0_exception_id));
+ ce->emit_call_c(Runtime1::entry_for (C1StubId::throw_div0_exception_id));
CHECK_BAILOUT();
ce->add_call_info_here(_info);
debug_only(__ should_not_reach_here());
@@ -124,9 +124,9 @@ void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
address a;
if (_info->deoptimize_on_exception()) {
// Deoptimize, do not throw the exception, because it is probably wrong to do it here.
- a = Runtime1::entry_for (Runtime1::predicate_failed_trap_id);
+ a = Runtime1::entry_for (C1StubId::predicate_failed_trap_id);
} else {
- a = Runtime1::entry_for (Runtime1::throw_null_pointer_exception_id);
+ a = Runtime1::entry_for (C1StubId::throw_null_pointer_exception_id);
}
ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
@@ -151,14 +151,14 @@ void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
debug_only(__ should_not_reach_here());
}
-NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, C1StubId stub_id) {
_result = result;
_klass = klass;
_klass_reg = klass_reg;
_info = new CodeEmitInfo(info);
- assert(stub_id == Runtime1::new_instance_id ||
- stub_id == Runtime1::fast_new_instance_id ||
- stub_id == Runtime1::fast_new_instance_init_check_id,
+ assert(stub_id == C1StubId::new_instance_id ||
+ stub_id == C1StubId::fast_new_instance_id ||
+ stub_id == C1StubId::fast_new_instance_init_check_id,
"need new_instance id");
_stub_id = stub_id;
}
@@ -186,7 +186,7 @@ void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
assert(_klass_reg->as_register() == Z_R11, "call target expects klass in Z_R11");
__ lgr_if_needed(Z_R13, _length->as_register());
- address a = Runtime1::entry_for (Runtime1::new_type_array_id);
+ address a = Runtime1::entry_for (C1StubId::new_type_array_id);
ce->emit_call_c(a);
CHECK_BAILOUT();
ce->add_call_info_here(_info);
@@ -206,7 +206,7 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
assert(_klass_reg->as_register() == Z_R11, "call target expects klass in Z_R11");
__ lgr_if_needed(Z_R13, _length->as_register());
- address a = Runtime1::entry_for (Runtime1::new_object_array_id);
+ address a = Runtime1::entry_for (C1StubId::new_object_array_id);
ce->emit_call_c(a);
CHECK_BAILOUT();
ce->add_call_info_here(_info);
@@ -217,11 +217,11 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
- Runtime1::StubID enter_id;
+ C1StubId enter_id;
if (ce->compilation()->has_fpu_code()) {
- enter_id = Runtime1::monitorenter_id;
+ enter_id = C1StubId::monitorenter_id;
} else {
- enter_id = Runtime1::monitorenter_nofpu_id;
+ enter_id = C1StubId::monitorenter_nofpu_id;
}
__ lgr_if_needed(Z_R1_scratch, _obj_reg->as_register());
__ lgr_if_needed(Z_R13, _lock_reg->as_register()); // See LIRGenerator::syncTempOpr().
@@ -242,11 +242,11 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
__ lgr_if_needed(Z_R1_scratch, _lock_reg->as_register());
}
// Note: non-blocking leaf routine => no call info needed.
- Runtime1::StubID exit_id;
+ C1StubId exit_id;
if (ce->compilation()->has_fpu_code()) {
- exit_id = Runtime1::monitorexit_id;
+ exit_id = C1StubId::monitorexit_id;
} else {
- exit_id = Runtime1::monitorexit_nofpu_id;
+ exit_id = C1StubId::monitorexit_nofpu_id;
}
ce->emit_call_c(Runtime1::entry_for (exit_id));
CHECK_BAILOUT();
@@ -378,10 +378,10 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
address target = nullptr;
relocInfo::relocType reloc_type = relocInfo::none;
switch (_id) {
- case access_field_id: target = Runtime1::entry_for (Runtime1::access_field_patching_id); break;
- case load_klass_id: target = Runtime1::entry_for (Runtime1::load_klass_patching_id); reloc_type = relocInfo::metadata_type; break;
- case load_mirror_id: target = Runtime1::entry_for (Runtime1::load_mirror_patching_id); reloc_type = relocInfo::oop_type; break;
- case load_appendix_id: target = Runtime1::entry_for (Runtime1::load_appendix_patching_id); reloc_type = relocInfo::oop_type; break;
+ case access_field_id: target = Runtime1::entry_for (C1StubId::access_field_patching_id); break;
+ case load_klass_id: target = Runtime1::entry_for (C1StubId::load_klass_patching_id); reloc_type = relocInfo::metadata_type; break;
+ case load_mirror_id: target = Runtime1::entry_for (C1StubId::load_mirror_patching_id); reloc_type = relocInfo::oop_type; break;
+ case load_appendix_id: target = Runtime1::entry_for (C1StubId::load_appendix_patching_id); reloc_type = relocInfo::oop_type; break;
default: ShouldNotReachHere();
}
__ bind(call_patch);
@@ -406,7 +406,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
__ load_const_optimized(Z_R1_scratch, _trap_request); // Pass trap request in Z_R1_scratch.
- ce->emit_call_c(Runtime1::entry_for (Runtime1::deoptimize_id));
+ ce->emit_call_c(Runtime1::entry_for (C1StubId::deoptimize_id));
CHECK_BAILOUT();
ce->add_call_info_here(_info);
DEBUG_ONLY(__ should_not_reach_here());
diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
index a5e62169a9350..d288f4a893d0a 100644
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
@@ -172,7 +172,7 @@ int LIR_Assembler::emit_exception_handler() {
int offset = code_offset();
- address a = Runtime1::entry_for (Runtime1::handle_exception_from_callee_id);
+ address a = Runtime1::entry_for (C1StubId::handle_exception_from_callee_id);
address call_addr = emit_call_c(a);
CHECK_BAILOUT_(-1);
__ should_not_reach_here();
@@ -212,7 +212,7 @@ int LIR_Assembler::emit_unwind_handler() {
// Perform needed unlocking.
MonitorExitStub* stub = nullptr;
if (method()->is_synchronized()) {
- // Runtime1::monitorexit_id expects lock address in Z_R1_scratch.
+ // C1StubId::monitorexit_id expects lock address in Z_R1_scratch.
LIR_Opr lock = FrameMap::as_opr(Z_R1_scratch);
monitor_address(0, lock);
stub = new MonitorExitStub(lock, true, 0);
@@ -241,7 +241,7 @@ int LIR_Assembler::emit_unwind_handler() {
// Z_EXC_PC: exception pc
// Dispatch to the unwind logic.
- __ load_const_optimized(Z_R5, Runtime1::entry_for (Runtime1::unwind_exception_id));
+ __ load_const_optimized(Z_R5, Runtime1::entry_for (C1StubId::unwind_exception_id));
__ z_br(Z_R5);
// Emit the slow path assembly.
@@ -1910,8 +1910,8 @@ void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmit
// Reuse the debug info from the safepoint poll for the throw op itself.
__ get_PC(Z_EXC_PC);
add_call_info(__ offset(), info); // for exception handler
- address stub = Runtime1::entry_for (compilation()->has_fpu_code() ? Runtime1::handle_exception_id
- : Runtime1::handle_exception_nofpu_id);
+ address stub = Runtime1::entry_for (compilation()->has_fpu_code() ? C1StubId::handle_exception_id
+ : C1StubId::handle_exception_nofpu_id);
emit_call_c(stub);
}
@@ -2116,7 +2116,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
store_parameter(src_klass, 0); // sub
store_parameter(dst_klass, 1); // super
- emit_call_c(Runtime1::entry_for (Runtime1::slow_subtype_check_id));
+ emit_call_c(Runtime1::entry_for (C1StubId::slow_subtype_check_id));
CHECK_BAILOUT2(cont, slow);
// Sets condition code 0 for match (2 otherwise).
__ branch_optimized(Assembler::bcondEqual, cont);
@@ -2539,7 +2539,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
RegisterOrConstant(super_check_offset));
if (need_slow_path) {
// Call out-of-line instance of __ check_klass_subtype_slow_path(...):
- address a = Runtime1::entry_for (Runtime1::slow_subtype_check_id);
+ address a = Runtime1::entry_for (C1StubId::slow_subtype_check_id);
store_parameter(klass_RInfo, 0); // sub
store_parameter(k_RInfo, 1); // super
emit_call_c(a); // Sets condition code 0 for match (2 otherwise).
@@ -2614,7 +2614,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
// Perform the fast part of the checking logic.
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, nullptr);
// Call out-of-line instance of __ check_klass_subtype_slow_path(...):
- address a = Runtime1::entry_for (Runtime1::slow_subtype_check_id);
+ address a = Runtime1::entry_for (C1StubId::slow_subtype_check_id);
store_parameter(klass_RInfo, 0); // sub
store_parameter(k_RInfo, 1); // super
emit_call_c(a); // Sets condition code 0 for match (2 otherwise).
diff --git a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
index 619f0f7174f01..f998e86256f56 100644
--- a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
@@ -885,7 +885,7 @@ void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
args->append(rank);
args->append(varargs);
LIR_Opr reg = result_register_for (x->type());
- __ call_runtime(Runtime1::entry_for (Runtime1::new_multi_array_id),
+ __ call_runtime(Runtime1::entry_for (C1StubId::new_multi_array_id),
LIR_OprFact::illegalOpr,
reg, args, info);
@@ -916,14 +916,14 @@ void LIRGenerator::do_CheckCast(CheckCast* x) {
CodeStub* stub;
if (x->is_incompatible_class_change_check()) {
assert(patching_info == nullptr, "can't patch this");
- stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception);
+ stub = new SimpleExceptionStub(C1StubId::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception);
} else if (x->is_invokespecial_receiver_check()) {
assert(patching_info == nullptr, "can't patch this");
stub = new DeoptimizeStub(info_for_exception,
Deoptimization::Reason_class_check,
Deoptimization::Action_none);
} else {
- stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
+ stub = new SimpleExceptionStub(C1StubId::throw_class_cast_exception_id, obj.result(), info_for_exception);
}
LIR_Opr reg = rlock_result(x);
LIR_Opr tmp1 = new_register(objectType);
diff --git a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp
index a9140a7925ebd..f6dd20db3d67f 100644
--- a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp
@@ -79,7 +79,7 @@ void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox
assert(LockingMode != LM_MONITOR, "LM_MONITOR is already handled, by emit_lock()");
if (LockingMode == LM_LIGHTWEIGHT) {
- lightweight_lock(Roop, Rmark, tmp, slow_case);
+ lightweight_lock(Rbox, Roop, Rmark, tmp, slow_case);
} else if (LockingMode == LM_LEGACY) {
NearLabel done;
@@ -254,7 +254,7 @@ void C1_MacroAssembler::initialize_object(
// Dtrace support is unimplemented.
// if (CURRENT_ENV->dtrace_alloc_probes()) {
// assert(obj == rax, "must be");
- // call(RuntimeAddress(Runtime1::entry_for (Runtime1::dtrace_object_alloc_id)));
+ // call(RuntimeAddress(Runtime1::entry_for (C1StubId::dtrace_object_alloc_id)));
// }
verify_oop(obj, FILE_AND_LINE);
@@ -315,7 +315,7 @@ void C1_MacroAssembler::allocate_array(
// Dtrace support is unimplemented.
// if (CURRENT_ENV->dtrace_alloc_probes()) {
// assert(obj == rax, "must be");
- // call(RuntimeAddress(Runtime1::entry_for (Runtime1::dtrace_object_alloc_id)));
+ // call(RuntimeAddress(Runtime1::entry_for (C1StubId::dtrace_object_alloc_id)));
// }
verify_oop(obj, FILE_AND_LINE);
diff --git a/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp b/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp
index 41c57043d8234..2f629c108c956 100644
--- a/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp
@@ -98,10 +98,10 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre
restore_return_pc();
load_const_optimized(Z_R1, StubRoutines::forward_exception_entry());
z_br(Z_R1);
- } else if (_stub_id == Runtime1::forward_exception_id) {
+ } else if (_stub_id == (int)C1StubId::forward_exception_id) {
should_not_reach_here();
} else {
- load_const_optimized(Z_R1, Runtime1::entry_for (Runtime1::forward_exception_id));
+ load_const_optimized(Z_R1, Runtime1::entry_for (C1StubId::forward_exception_id));
z_br(Z_R1);
}
@@ -305,7 +305,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
return oop_maps;
}
-OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) {
// for better readability
const bool must_gc_arguments = true;
@@ -318,26 +318,26 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
// Stub code and info for the different stubs.
OopMapSet* oop_maps = nullptr;
switch (id) {
- case forward_exception_id:
+ case C1StubId::forward_exception_id:
{
oop_maps = generate_handle_exception(id, sasm);
// will not return
}
break;
- case new_instance_id:
- case fast_new_instance_id:
- case fast_new_instance_init_check_id:
+ case C1StubId::new_instance_id:
+ case C1StubId::fast_new_instance_id:
+ case C1StubId::fast_new_instance_init_check_id:
{
Register klass = Z_R11; // Incoming
Register obj = Z_R2; // Result
- if (id == new_instance_id) {
+ if (id == C1StubId::new_instance_id) {
__ set_info("new_instance", dont_gc_arguments);
- } else if (id == fast_new_instance_id) {
+ } else if (id == C1StubId::fast_new_instance_id) {
__ set_info("fast new_instance", dont_gc_arguments);
} else {
- assert(id == fast_new_instance_init_check_id, "bad StubID");
+ assert(id == C1StubId::fast_new_instance_init_check_id, "bad C1StubId");
__ set_info("fast new_instance init check", dont_gc_arguments);
}
@@ -352,7 +352,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case counter_overflow_id:
+ case C1StubId::counter_overflow_id:
{
// Arguments :
// bci : stack param 0
@@ -371,14 +371,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ z_br(Z_R14);
}
break;
- case new_type_array_id:
- case new_object_array_id:
+ case C1StubId::new_type_array_id:
+ case C1StubId::new_object_array_id:
{
Register length = Z_R13; // Incoming
Register klass = Z_R11; // Incoming
Register obj = Z_R2; // Result
- if (id == new_type_array_id) {
+ if (id == C1StubId::new_type_array_id) {
__ set_info("new_type_array", dont_gc_arguments);
} else {
__ set_info("new_object_array", dont_gc_arguments);
@@ -391,7 +391,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
Register t0 = obj;
__ mem2reg_opt(t0, Address(klass, Klass::layout_helper_offset()), false);
__ z_sra(t0, Klass::_lh_array_tag_shift);
- int tag = ((id == new_type_array_id)
+ int tag = ((id == C1StubId::new_type_array_id)
? Klass::_lh_array_tag_type_value
: Klass::_lh_array_tag_obj_value);
__ compare32_and_branch(t0, tag, Assembler::bcondEqual, ok);
@@ -403,7 +403,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
OopMap* map = save_live_registers_except_r2(sasm);
int call_offset;
- if (id == new_type_array_id) {
+ if (id == C1StubId::new_type_array_id) {
call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
} else {
call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
@@ -418,7 +418,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case new_multi_array_id:
+ case C1StubId::new_multi_array_id:
{ __ set_info("new_multi_array", dont_gc_arguments);
// Z_R3,: klass
// Z_R4,: rank
@@ -436,7 +436,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case register_finalizer_id:
+ case C1StubId::register_finalizer_id:
{
__ set_info("register_finalizer", dont_gc_arguments);
@@ -459,62 +459,62 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case throw_range_check_failed_id:
+ case C1StubId::throw_range_check_failed_id:
{ __ set_info("range_check_failed", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
}
break;
- case throw_index_exception_id:
+ case C1StubId::throw_index_exception_id:
{ __ set_info("index_range_check_failed", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
}
break;
- case throw_div0_exception_id:
+ case C1StubId::throw_div0_exception_id:
{ __ set_info("throw_div0_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
}
break;
- case throw_null_pointer_exception_id:
+ case C1StubId::throw_null_pointer_exception_id:
{ __ set_info("throw_null_pointer_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
}
break;
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
{ __ set_info("handle_exception", dont_gc_arguments);
oop_maps = generate_handle_exception(id, sasm);
}
break;
- case handle_exception_from_callee_id:
+ case C1StubId::handle_exception_from_callee_id:
{ __ set_info("handle_exception_from_callee", dont_gc_arguments);
oop_maps = generate_handle_exception(id, sasm);
}
break;
- case unwind_exception_id:
+ case C1StubId::unwind_exception_id:
{ __ set_info("unwind_exception", dont_gc_arguments);
// Note: no stubframe since we are about to leave the current
// activation and we are calling a leaf VM function only.
generate_unwind_exception(sasm);
}
break;
- case throw_array_store_exception_id:
+ case C1StubId::throw_array_store_exception_id:
{ __ set_info("throw_array_store_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
}
break;
- case throw_class_cast_exception_id:
+ case C1StubId::throw_class_cast_exception_id:
{ // Z_R1_scratch: object
__ set_info("throw_class_cast_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
}
break;
- case throw_incompatible_class_change_error_id:
+ case C1StubId::throw_incompatible_class_change_error_id:
{ __ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
}
break;
- case slow_subtype_check_id:
+ case C1StubId::slow_subtype_check_id:
{
// Arguments :
// sub : stack param 0
@@ -580,13 +580,13 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ z_br(Z_R14);
}
break;
- case monitorenter_nofpu_id:
- case monitorenter_id:
+ case C1StubId::monitorenter_nofpu_id:
+ case C1StubId::monitorenter_id:
{ // Z_R1_scratch : object
// Z_R13 : lock address (see LIRGenerator::syncTempOpr())
__ set_info("monitorenter", dont_gc_arguments);
- int save_fpu_registers = (id == monitorenter_id);
+ int save_fpu_registers = (id == C1StubId::monitorenter_id);
// Make a frame and preserve the caller's caller-save registers.
OopMap* oop_map = save_live_registers(sasm, save_fpu_registers);
@@ -600,15 +600,15 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case monitorexit_nofpu_id:
- case monitorexit_id:
+ case C1StubId::monitorexit_nofpu_id:
+ case C1StubId::monitorexit_id:
{ // Z_R1_scratch : lock address
// Note: really a leaf routine but must setup last java sp
// => Use call_RT for now (speed can be improved by
// doing last java sp setup manually).
__ set_info("monitorexit", dont_gc_arguments);
- int save_fpu_registers = (id == monitorexit_id);
+ int save_fpu_registers = (id == C1StubId::monitorexit_id);
// Make a frame and preserve the caller's caller-save registers.
OopMap* oop_map = save_live_registers(sasm, save_fpu_registers);
@@ -622,7 +622,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case deoptimize_id:
+ case C1StubId::deoptimize_id:
{ // Args: Z_R1_scratch: trap request
__ set_info("deoptimize", dont_gc_arguments);
Register trap_request = Z_R1_scratch;
@@ -639,32 +639,32 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case access_field_patching_id:
+ case C1StubId::access_field_patching_id:
{ __ set_info("access_field_patching", dont_gc_arguments);
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
}
break;
- case load_klass_patching_id:
+ case C1StubId::load_klass_patching_id:
{ __ set_info("load_klass_patching", dont_gc_arguments);
// We should set up register map.
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
}
break;
- case load_mirror_patching_id:
+ case C1StubId::load_mirror_patching_id:
{ __ set_info("load_mirror_patching", dont_gc_arguments);
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
}
break;
- case load_appendix_patching_id:
+ case C1StubId::load_appendix_patching_id:
{ __ set_info("load_appendix_patching", dont_gc_arguments);
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
}
break;
#if 0
- case dtrace_object_alloc_id:
+ case C1StubId::dtrace_object_alloc_id:
{ // rax,: object
StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
// We can't gc here so skip the oopmap but make sure that all
@@ -679,7 +679,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case fpu2long_stub_id:
+ case C1StubId::fpu2long_stub_id:
{
// rax, and rdx are destroyed, but should be free since the result is returned there
// preserve rsi,ecx
@@ -754,7 +754,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
break;
#endif // TODO
- case predicate_failed_trap_id:
+ case C1StubId::predicate_failed_trap_id:
{
__ set_info("predicate_failed_trap", dont_gc_arguments);
@@ -775,14 +775,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
default:
{
- __ should_not_reach_here(FILE_AND_LINE, id);
+ __ should_not_reach_here(FILE_AND_LINE, (int)id);
}
break;
}
return oop_maps;
}
-OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
+OopMapSet* Runtime1::generate_handle_exception(C1StubId id, StubAssembler *sasm) {
__ block_comment("generate_handle_exception");
// incoming parameters: Z_EXC_OOP, Z_EXC_PC
@@ -793,7 +793,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
Register reg_fp = Z_R1_scratch;
switch (id) {
- case forward_exception_id: {
+ case C1StubId::forward_exception_id: {
// We're handling an exception in the context of a compiled frame.
// The registers have been saved in the standard places. Perform
// an exception lookup in the caller and dispatch to the handler
@@ -820,13 +820,13 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
__ clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(Metadata*));
break;
}
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
// At this point all registers MAY be live.
DEBUG_ONLY(__ z_lgr(reg_fp, Z_SP);)
- oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id, Z_EXC_PC);
+ oop_map = save_live_registers(sasm, id != C1StubId::handle_exception_nofpu_id, Z_EXC_PC);
break;
- case handle_exception_from_callee_id: {
+ case C1StubId::handle_exception_from_callee_id: {
// At this point all registers except Z_EXC_OOP and Z_EXC_PC are dead.
DEBUG_ONLY(__ z_lgr(reg_fp, Z_SP);)
__ save_return_pc(Z_EXC_PC);
@@ -875,15 +875,15 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
__ invalidate_registers(Z_R2);
switch(id) {
- case forward_exception_id:
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::forward_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
// Restore the registers that were saved at the beginning.
__ z_lgr(Z_R1_scratch, Z_R2); // Restoring live registers kills Z_R2.
- restore_live_registers(sasm, id != handle_exception_nofpu_id); // Pops as well the frame.
+ restore_live_registers(sasm, id != C1StubId::handle_exception_nofpu_id); // Pops as well the frame.
__ z_br(Z_R1_scratch);
break;
- case handle_exception_from_callee_id: {
+ case C1StubId::handle_exception_from_callee_id: {
__ pop_frame();
__ z_br(Z_R2); // Jump to exception handler.
}
diff --git a/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp b/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
index 3641d82dabea9..025ef4c8915cd 100644
--- a/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
@@ -34,12 +34,12 @@
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register temp1, Register temp2) {
- compiler_fast_lock_lightweight_object(obj, temp1, temp2);
+ compiler_fast_lock_lightweight_object(obj, box, temp1, temp2);
}
void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Register temp1, Register temp2) {
- compiler_fast_unlock_lightweight_object(obj, temp1, temp2);
+ compiler_fast_unlock_lightweight_object(obj, box, temp1, temp2);
}
//------------------------------------------------------
diff --git a/src/hotspot/cpu/s390/downcallLinker_s390.cpp b/src/hotspot/cpu/s390/downcallLinker_s390.cpp
index 383a32448745c..85ddc5bf18548 100644
--- a/src/hotspot/cpu/s390/downcallLinker_s390.cpp
+++ b/src/hotspot/cpu/s390/downcallLinker_s390.cpp
@@ -36,8 +36,8 @@
#define __ _masm->
-static const int native_invoker_code_base_size = 512;
-static const int native_invoker_size_per_args = 8;
+static const int native_invoker_code_base_size = 384;
+static const int native_invoker_size_per_args = 12;
RuntimeStub* DowncallLinker::make_downcall_stub(BasicType* signature,
int num_args,
diff --git a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp
index 37631298920ca..544c82d34a769 100644
--- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018, 2023 SAP SE. All rights reserved.
+ * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -42,11 +42,47 @@
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/g1/c1/g1BarrierSetC1.hpp"
-#endif
+#endif // COMPILER1
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif // COMPILER2
#define __ masm->
-#define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str)
+#define BLOCK_COMMENT(str) __ block_comment(str)
+
+static void generate_pre_barrier_fast_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp1) {
+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+ // Is marking active?
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+ __ load_and_test_int(tmp1, in_progress);
+ } else {
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ load_and_test_byte(tmp1, in_progress);
+ }
+}
+
+static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
+ const Register Z_thread, const Register value, const Register temp) {
+ BLOCK_COMMENT("generate_queue_test_and_insertion {");
+
+ assert_different_registers(temp, value);
+ // Can we store a value in the given thread's buffer?
+ // (The index field is typed as size_t.)
+
+ __ load_and_test_long(temp, Address(Z_thread, in_bytes(index_offset))); // temp := *(index address)
+ __ branch_optimized(Assembler::bcondEqual, runtime); // jump to runtime if index == 0 (full buffer)
+
+ // The buffer is not full, store value into it.
+ __ add2reg(temp, -wordSize); // temp := next index
+ __ z_stg(temp, in_bytes(index_offset), Z_thread); // *(index address) := next index
+
+ __ z_ag(temp, Address(Z_thread, in_bytes(buffer_offset))); // temp := buffer address + next index
+ __ z_stg(value, 0, temp); // *(buffer address + next index) := value
+ BLOCK_COMMENT("} generate_queue_test_and_insertion");
+}
void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count) {
@@ -59,13 +95,8 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
assert_different_registers(addr, Z_R0_scratch); // would be destroyed by push_frame()
assert_different_registers(count, Z_R0_scratch); // would be destroyed by push_frame()
Register Rtmp1 = Z_R0_scratch;
- const int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
- __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset));
- } else {
- guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset));
- }
+
+ generate_pre_barrier_fast_path(masm, Z_thread, Rtmp1);
__ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently.
RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers); // Creates frame.
@@ -100,6 +131,181 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
}
}
+#if defined(COMPILER2)
+
+#undef __
+#define __ masm->
+
+static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register pre_val, const address runtime_path) {
+ BLOCK_COMMENT("generate_c2_barrier_runtime_call {");
+ SaveLiveRegisters save_registers(masm, stub);
+ __ call_VM_leaf(runtime_path, pre_val, Z_thread);
+ BLOCK_COMMENT("} generate_c2_barrier_runtime_call");
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ G1PreBarrierStubC2* stub) {
+
+ BLOCK_COMMENT("g1_write_barrier_pre_c2 {");
+
+ assert(thread == Z_thread, "must be");
+ assert_different_registers(obj, pre_val, tmp1);
+ assert(pre_val != noreg && tmp1 != noreg, "expecting a register");
+
+ stub->initialize_registers(obj, pre_val, thread, tmp1, noreg);
+
+ generate_pre_barrier_fast_path(masm, thread, tmp1);
+ __ branch_optimized(Assembler::bcondNotEqual, *stub->entry()); // Activity indicator is zero, so there is no marking going on currently.
+
+ __ bind(*stub->continuation());
+
+ BLOCK_COMMENT("} g1_write_barrier_pre_c2");
+}
+
+void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const {
+
+ BLOCK_COMMENT("generate_c2_pre_barrier_stub {");
+
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+
+ Label runtime;
+ Register obj = stub->obj();
+ Register pre_val = stub->pre_val();
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1();
+
+ __ bind(*stub->entry());
+
+ BLOCK_COMMENT("generate_pre_val_not_null_test {");
+ if (obj != noreg) {
+ __ load_heap_oop(pre_val, Address(obj), noreg, noreg, AS_RAW);
+ }
+ __ z_ltgr(pre_val, pre_val);
+ __ branch_optimized(Assembler::bcondEqual, *stub->continuation());
+ BLOCK_COMMENT("} generate_pre_val_not_null_test");
+
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::satb_mark_queue_index_offset(),
+ G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ runtime,
+ Z_thread, pre_val, tmp1);
+
+ __ branch_optimized(Assembler::bcondAlways, *stub->continuation());
+
+ __ bind(runtime);
+
+ generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
+
+ __ branch_optimized(Assembler::bcondAlways, *stub->continuation());
+
+ BLOCK_COMMENT("} generate_c2_pre_barrier_stub");
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* stub) {
+ BLOCK_COMMENT("g1_write_barrier_post_c2 {");
+
+ assert(thread == Z_thread, "must be");
+ assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, Z_R1_scratch);
+
+ assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
+
+ stub->initialize_registers(thread, tmp1, tmp2);
+
+ BLOCK_COMMENT("generate_region_crossing_test {");
+ if (VM_Version::has_DistinctOpnds()) {
+ __ z_xgrk(tmp1, store_addr, new_val);
+ } else {
+ __ z_lgr(tmp1, store_addr);
+ __ z_xgr(tmp1, new_val);
+ }
+ __ z_srag(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes);
+ __ branch_optimized(Assembler::bcondEqual, *stub->continuation());
+ BLOCK_COMMENT("} generate_region_crossing_test");
+
+ // crosses regions, storing null?
+ if ((stub->barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ z_ltgr(new_val, new_val);
+ __ branch_optimized(Assembler::bcondEqual, *stub->continuation());
+ }
+
+ BLOCK_COMMENT("generate_card_young_test {");
+ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
+ // calculate address of card
+ __ load_const_optimized(tmp2, (address)ct->card_table()->byte_map_base()); // Card table base.
+ __ z_srlg(tmp1, store_addr, CardTable::card_shift()); // Index into card table.
+ __ z_algr(tmp1, tmp2); // Explicit calculation needed for cli.
+
+ // Filter young.
+ __ z_cli(0, tmp1, G1CardTable::g1_young_card_val());
+
+ BLOCK_COMMENT("} generate_card_young_test");
+
+ // From here on, tmp1 holds the card address.
+ __ branch_optimized(Assembler::bcondNotEqual, *stub->entry());
+
+ __ bind(*stub->continuation());
+
+ BLOCK_COMMENT("} g1_write_barrier_post_c2");
+}
+
+void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const {
+
+ BLOCK_COMMENT("generate_c2_post_barrier_stub {");
+
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
+ Register tmp2 = stub->tmp2();
+ Register Rcard_addr = tmp1;
+
+ __ bind(*stub->entry());
+
+ BLOCK_COMMENT("generate_card_clean_test {");
+ __ z_sync(); // Required to support concurrent cleaning.
+ __ z_cli(0, Rcard_addr, 0); // Reload after membar.
+ __ branch_optimized(Assembler::bcondEqual, *stub->continuation());
+ BLOCK_COMMENT("} generate_card_clean_test");
+
+ BLOCK_COMMENT("generate_dirty_card {");
+ // Storing a region crossing, non-null oop, card is clean.
+ // Dirty card and log.
+ STATIC_ASSERT(CardTable::dirty_card_val() == 0);
+ __ z_mvi(0, Rcard_addr, CardTable::dirty_card_val());
+ BLOCK_COMMENT("} generate_dirty_card");
+
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ runtime,
+ Z_thread, tmp1, tmp2);
+
+ __ branch_optimized(Assembler::bcondAlways, *stub->continuation());
+
+ __ bind(runtime);
+
+ generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
+
+ __ branch_optimized(Assembler::bcondAlways, *stub->continuation());
+
+ BLOCK_COMMENT("} generate_c2_post_barrier_stub");
+}
+
+#endif //COMPILER2
+
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
const Address& src, Register dst, Register tmp1, Register tmp2, Label *L_handle_null) {
bool on_oop = is_reference_type(type);
@@ -136,9 +342,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
const Register Robj = obj ? obj->base() : noreg,
Roff = obj ? obj->index() : noreg;
- const int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
- const int buffer_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
- const int index_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
assert_different_registers(Rtmp1, Rtmp2, Z_R0_scratch); // None of the Rtmp must be Z_R0!!
assert_different_registers(Robj, Z_R0_scratch); // Used for addressing. Furthermore, push_frame destroys Z_R0!!
assert_different_registers(Rval, Z_R0_scratch); // push_frame destroys Z_R0!!
@@ -147,14 +350,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
BLOCK_COMMENT("g1_write_barrier_pre {");
- // Is marking active?
- // Note: value is loaded for test purposes only. No further use here.
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
- __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset));
- } else {
- guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset));
- }
+ generate_pre_barrier_fast_path(masm, Z_thread, Rtmp1);
__ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently.
assert(Rpre_val != noreg, "must have a real register");
@@ -194,24 +390,14 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
// We can store the original value in the thread's buffer
// only if index > 0. Otherwise, we need runtime to handle.
// (The index field is typed as size_t.)
- Register Rbuffer = Rtmp1, Rindex = Rtmp2;
- assert_different_registers(Rbuffer, Rindex, Rpre_val);
-
- __ z_lg(Rbuffer, buffer_offset, Z_thread);
- __ load_and_test_long(Rindex, Address(Z_thread, index_offset));
- __ z_bre(callRuntime); // If index == 0, goto runtime.
-
- __ add2reg(Rindex, -wordSize); // Decrement index.
- __ z_stg(Rindex, index_offset, Z_thread);
-
- // Record the previous value.
- __ z_stg(Rpre_val, 0, Rbuffer, Rindex);
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::satb_mark_queue_index_offset(),
+ G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ callRuntime,
+ Z_thread, Rpre_val, Rtmp2);
__ z_bru(filtered); // We are done.
- Rbuffer = noreg; // end of life
- Rindex = noreg; // end of life
-
__ bind(callRuntime);
// Save some registers (inputs and result) over runtime call
@@ -326,23 +512,16 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato
Register Rcard_addr_x = Rcard_addr;
Register Rqueue_index = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp1;
- Register Rqueue_buf = (Rtmp3 != Z_R0_scratch) ? Rtmp3 : Rtmp1;
- const int qidx_off = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
- const int qbuf_off = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
- if ((Rcard_addr == Rqueue_buf) || (Rcard_addr == Rqueue_index)) {
+ if (Rcard_addr == Rqueue_index) {
Rcard_addr_x = Z_R0_scratch; // Register shortage. We have to use Z_R0.
}
__ lgr_if_needed(Rcard_addr_x, Rcard_addr);
- __ load_and_test_long(Rqueue_index, Address(Z_thread, qidx_off));
- __ z_bre(callRuntime); // Index == 0 then jump to runtime.
-
- __ z_lg(Rqueue_buf, qbuf_off, Z_thread);
-
- __ add2reg(Rqueue_index, -wordSize); // Decrement index.
- __ z_stg(Rqueue_index, qidx_off, Z_thread);
-
- __ z_stg(Rcard_addr_x, 0, Rqueue_index, Rqueue_buf); // Store card.
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ callRuntime,
+ Z_thread, Rcard_addr_x, Rqueue_index);
__ z_bru(filtered);
__ bind(callRuntime);
diff --git a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp
index cc1d51d2fa13e..0f0bdd8b83cfd 100644
--- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018 SAP SE. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,6 +34,8 @@ class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
+class G1PreBarrierStubC2;
+class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -62,7 +64,27 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
-#endif
+#endif // COMPILER1
+
+#ifdef COMPILER2
+ void g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ G1PreBarrierStubC2* c2_stub);
+ void generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const;
+ void g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* c2_stub);
+ void generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const;
+#endif // COMPILER2
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
const Address& src, Register dst, Register tmp1, Register tmp2, Label *L_handle_null = nullptr);
diff --git a/src/hotspot/cpu/s390/gc/g1/g1_s390.ad b/src/hotspot/cpu/s390/gc/g1/g1_s390.ad
new file mode 100644
index 0000000000000..31f60c4aeff0b
--- /dev/null
+++ b/src/hotspot/cpu/s390/gc/g1/g1_s390.ad
@@ -0,0 +1,457 @@
+//
+// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright 2024 IBM Corporation. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+source_hpp %{
+
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#include "gc/shared/gc_globals.hpp"
+
+%}
+
+source %{
+
+#include "gc/g1/g1BarrierSetAssembler_s390.hpp"
+#include "gc/g1/g1BarrierSetRuntime.hpp"
+
+static void write_barrier_pre(MacroAssembler* masm,
+ const MachNode* node,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ RegSet preserve = RegSet(),
+ RegSet no_preserve = RegSet()) {
+ if (!G1PreBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node);
+ for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) {
+ stub->preserve(*reg);
+ }
+ for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) {
+ stub->dont_preserve(*reg);
+ }
+ g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, Z_thread, tmp1, stub);
+}
+
+static void write_barrier_post(MacroAssembler* masm,
+ const MachNode* node,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2) {
+ if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Z_thread, tmp1, tmp2, stub);
+}
+
+%} // source
+
+// store pointer
+instruct g1StoreP(indirect dst, memoryRegP src, iRegL tmp1, iRegL tmp2, flagsReg cr) %{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set dst (StoreP dst src));
+ effect(TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ format %{ "STG $src,$dst\t # ptr" %}
+ ins_encode %{
+ __ block_comment("g1StoreP {");
+ write_barrier_pre(masm, this,
+ $dst$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ RegSet::of($dst$$Register, $src$$Register) /* preserve */);
+
+ __ z_stg($src$$Register, Address($dst$$Register));
+
+ write_barrier_post(masm, this,
+ $dst$$Register, /* store_addr */
+ $src$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ __ block_comment("} g1StoreP");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Store Compressed Pointer
+instruct g1StoreN(indirect mem, iRegN_P2N src, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ format %{ "STY $src,$mem\t # (cOop)" %}
+ ins_encode %{
+ __ block_comment("g1StoreN {");
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+
+ __ z_sty($src$$Register, Address($mem$$Register));
+
+ if ((barrier_data() & G1C2BarrierPost) != 0) {
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ oop_decoder($tmp1$$Register, $src$$Register, true /* maybe_null */);
+ } else {
+ __ oop_decoder($tmp1$$Register, $src$$Register, false /* maybe_null */);
+ }
+ }
+
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ __ block_comment("} g1StoreN");
+ %}
+
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct g1CompareAndSwapN(indirect mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegI res, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
+ effect(USE mem_ptr, TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL oldval, KILL cr);
+ format %{ "$res = CompareAndSwapN $oldval,$newval,$mem_ptr" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem_ptr$$Register);
+ assert_different_registers($newval$$Register, $mem_ptr$$Register);
+ __ block_comment("g1compareAndSwapN {");
+
+ Register Rcomp = reg_to_register_object($oldval$$reg);
+ Register Rnew = reg_to_register_object($newval$$reg);
+ Register Raddr = reg_to_register_object($mem_ptr$$reg);
+ Register Rres = reg_to_register_object($res$$reg);
+
+ write_barrier_pre(masm, this,
+ Raddr /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ RegSet::of(Raddr, Rcomp, Rnew) /* preserve */,
+ RegSet::of(Rres) /* no_preserve */);
+
+ __ z_cs(Rcomp, Rnew, 0, Raddr);
+
+ assert_different_registers(Rres, Raddr);
+ if (VM_Version::has_LoadStoreConditional()) {
+ __ load_const_optimized(Z_R0_scratch, 0L); // false (failed)
+ __ load_const_optimized(Rres, 1L); // true (succeed)
+ __ z_locgr(Rres, Z_R0_scratch, Assembler::bcondNotEqual);
+ } else {
+ Label done;
+ __ load_const_optimized(Rres, 0L); // false (failed)
+ __ z_brne(done); // Assume true to be the common case.
+ __ load_const_optimized(Rres, 1L); // true (succeed)
+ __ bind(done);
+ }
+
+ __ oop_decoder($tmp3$$Register, Rnew, true /* maybe_null */);
+
+ write_barrier_post(masm, this,
+ Raddr /* store_addr */,
+ $tmp3$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ __ block_comment("} g1compareAndSwapN");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct g1CompareAndExchangeN(iRegP mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegN res, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeN mem_ptr (Binary oldval newval)));
+ effect(USE mem_ptr, TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL oldval, KILL cr);
+ format %{ "$res = CompareAndExchangeN $oldval,$newval,$mem_ptr" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem_ptr$$Register);
+ assert_different_registers($newval$$Register, $mem_ptr$$Register);
+ __ block_comment("g1CompareAndExchangeN {");
+ write_barrier_pre(masm, this,
+ $mem_ptr$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ RegSet::of($mem_ptr$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+
+ Register Rcomp = reg_to_register_object($oldval$$reg);
+ Register Rnew = reg_to_register_object($newval$$reg);
+ Register Raddr = reg_to_register_object($mem_ptr$$reg);
+
+ Register Rres = reg_to_register_object($res$$reg);
+ assert_different_registers(Rres, Raddr);
+
+ __ z_lgr(Rres, Rcomp); // previous contents
+ __ z_csy(Rres, Rnew, 0, Raddr); // Try to store new value.
+
+ __ oop_decoder($tmp1$$Register, Rnew, true /* maybe_null */);
+
+ write_barrier_post(masm, this,
+ Raddr /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ __ block_comment("} g1CompareAndExchangeN");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load narrow oop
+instruct g1LoadN(iRegN dst, indirect mem, iRegP tmp1, iRegP tmp2, flagsReg cr) %{
+ predicate(UseG1GC && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadN mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ format %{ "LoadN $dst,$mem\t # (cOop)" %}
+ ins_encode %{
+ __ block_comment("g1LoadN {");
+ __ z_llgf($dst$$Register, Address($mem$$Register));
+ if ((barrier_data() & G1C2BarrierPre) != 0) {
+ __ oop_decoder($tmp1$$Register, $dst$$Register, true);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register );
+ }
+ __ block_comment("} g1LoadN");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct g1GetAndSetN(indirect mem, iRegN dst, iRegI tmp, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set dst (GetAndSetN mem dst));
+ effect(KILL cr, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); // USE_DEF dst by match rule.
+ format %{ "XCHGN $dst,[$mem]\t # EXCHANGE (coop, atomic), temp $tmp" %}
+ ins_encode %{
+ __ block_comment("g1GetAndSetN {");
+ assert_different_registers($mem$$Register, $dst$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ RegSet::of($mem$$Register, $dst$$Register) /* preserve */);
+
+ Register Rdst = reg_to_register_object($dst$$reg);
+ Register Rtmp = reg_to_register_object($tmp$$reg);
+ guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
+ Label retry;
+
+ // Iterate until swap succeeds.
+ __ z_llgf(Rtmp, Address($mem$$Register)); // current contents
+ __ bind(retry);
+ // Calculate incremented value.
+ __ z_csy(Rtmp, Rdst, Address($mem$$Register)); // Try to store new value.
+ __ z_brne(retry); // Yikes, concurrent update, need to retry.
+
+ __ oop_decoder($tmp1$$Register, $dst$$Register, true /* maybe_null */);
+
+ __ z_lgr(Rdst, Rtmp); // Exchanged value from memory is return value.
+
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+
+ __ block_comment("} g1GetAndSetN");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct g1CompareAndSwapP(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegI res, iRegL tmp1, iRegL tmp2, flagsReg cr) %{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, USE mem_ptr, USE_KILL oldval, KILL cr);
+ format %{ "$res = CompareAndSwapP $oldval,$newval,$mem_ptr" %}
+ ins_encode %{
+ __ block_comment("g1CompareAndSwapP {");
+ assert_different_registers($oldval$$Register, $mem_ptr$$Register);
+ assert_different_registers($newval$$Register, $mem_ptr$$Register);
+
+ Register Rcomp = reg_to_register_object($oldval$$reg);
+ Register Rnew = reg_to_register_object($newval$$reg);
+ Register Raddr = reg_to_register_object($mem_ptr$$reg);
+ Register Rres = reg_to_register_object($res$$reg);
+
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ Rcomp /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ RegSet::of(Raddr, Rcomp, Rnew) /* preserve */,
+ RegSet::of(Rres) /* no_preserve */);
+
+ __ z_csg(Rcomp, Rnew, 0, Raddr);
+
+ if (VM_Version::has_LoadStoreConditional()) {
+ __ load_const_optimized(Z_R0_scratch, 0L); // false (failed)
+ __ load_const_optimized(Rres, 1L); // true (succeed)
+ __ z_locgr(Rres, Z_R0_scratch, Assembler::bcondNotEqual);
+ } else {
+ Label done;
+ __ load_const_optimized(Rres, 0L); // false (failed)
+ __ z_brne(done); // Assume true to be the common case.
+ __ load_const_optimized(Rres, 1L); // true (succeed)
+ __ bind(done);
+ }
+
+ write_barrier_post(masm, this,
+ Raddr /* store_addr */,
+ Rnew /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ __ block_comment("} g1CompareAndSwapP");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct g1CompareAndExchangeP(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegP res, iRegL tmp1, iRegL tmp2, flagsReg cr) %{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeP mem_ptr (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, USE mem_ptr, USE_KILL oldval, KILL cr);
+ format %{ "$res = CompareAndExchangeP $oldval,$newval,$mem_ptr" %}
+ ins_encode %{
+ __ block_comment("g1CompareAndExchangeP {");
+ assert_different_registers($oldval$$Register, $mem_ptr$$Register);
+ assert_different_registers($newval$$Register, $mem_ptr$$Register);
+
+ // Pass $oldval to the pre-barrier (instead of loading from $mem), because
+ // $oldval is the only value that can be overwritten.
+ // The same holds for g1CompareAndSwapP.
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ RegSet::of($mem_ptr$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+
+ __ z_lgr($res$$Register, $oldval$$Register); // previous content
+
+ __ z_csg($oldval$$Register, $newval$$Register, 0, $mem_ptr$$reg);
+
+ write_barrier_post(masm, this,
+ $mem_ptr$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ __ block_comment("} g1CompareAndExchangeP");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load Pointer
+instruct g1LoadP(iRegP dst, memory mem, iRegL tmp1, flagsReg cr) %{
+ predicate(UseG1GC && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadP mem));
+ effect(TEMP dst, TEMP tmp1, KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ format %{ "LG $dst,$mem\t # ptr" %}
+ ins_encode %{
+ __ block_comment("g1LoadP {");
+ __ z_lg($dst$$Register, $mem$$Address);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $dst$$Register /* pre_val */,
+ $tmp1$$Register );
+ __ block_comment("} g1LoadP");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct g1GetAndSetP(indirect mem, iRegP dst, iRegL tmp, iRegL tmp1, iRegL tmp2, flagsReg cr) %{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set dst (GetAndSetP mem dst));
+ effect(KILL cr, TEMP tmp, TEMP tmp1, TEMP tmp2); // USE_DEF dst by match rule.
+ format %{ "XCHGP $dst,[$mem]\t # EXCHANGE (oop, atomic), temp $tmp" %}
+ ins_encode %{
+ __ block_comment("g1GetAndSetP {");
+
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp$$Register /* pre_val (as a temporary register) */,
+ $tmp1$$Register /* tmp1 */,
+ RegSet::of($mem$$Register, $dst$$Register) /* preserve */);
+
+ __ z_lgr($tmp1$$Register, $dst$$Register);
+ Register Rdst = reg_to_register_object($dst$$reg);
+ Register Rtmp = reg_to_register_object($tmp$$reg);
+ guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
+ Label retry;
+
+ // Iterate until swap succeeds.
+ __ z_lg(Rtmp, Address($mem$$Register)); // current contents
+ __ bind(retry);
+ // Calculate incremented value.
+ __ z_csg(Rtmp, Rdst, Address($mem$$Register)); // Try to store new value.
+ __ z_brne(retry); // Yikes, concurrent update, need to retry.
+ __ z_lgr(Rdst, Rtmp); // Exchanged value from memory is return value.
+
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp$$Register /* tmp2 */);
+ __ block_comment("} g1GetAndSetP");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct g1EncodePAndStoreN(indirect mem, iRegP src, iRegL tmp1, iRegL tmp2, flagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem (EncodeP src)));
+ effect(TEMP tmp1, TEMP tmp2, KILL cr);
+ // ins_cost(INSN_COST);
+ format %{ "encode_heap_oop $tmp1, $src\n\t"
+ "st $tmp1, $mem\t# compressed ptr" %}
+ ins_encode %{
+ __ block_comment("g1EncodePAndStoreN {");
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ oop_encoder($tmp1$$Register, $src$$Register, true /* maybe_null */);
+ } else {
+ __ oop_encoder($tmp1$$Register, $src$$Register, false /* maybe_null */);
+ }
+ __ z_st($tmp1$$Register, Address($mem$$Register));
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ __ block_comment("} g1EncodePAndStoreN");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp
index d3457916bc9d5..d826b4a06f336 100644
--- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp
@@ -33,6 +33,9 @@
#include "runtime/jniHandles.hpp"
#include "runtime/stubRoutines.hpp"
#include "utilities/macros.hpp"
+#ifdef COMPILER2
+#include "gc/shared/c2/barrierSetC2.hpp"
+#endif // COMPILER2
#define __ masm->
@@ -105,16 +108,60 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators
}
}
+// Generic implementation. GCs can provide an optimized one.
void BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2) {
- NearLabel Ldone;
- __ z_ltgr(tmp1, value);
- __ z_bre(Ldone); // Use null result as-is.
- __ z_nill(value, ~JNIHandles::tag_mask);
- __ z_lg(value, 0, value); // Resolve (untagged) jobject.
+ assert_different_registers(value, tmp1, tmp2);
+ NearLabel done, weak_tag, verify, tagged;
+ __ z_ltgr(value, value);
+ __ z_bre(done); // Use null result as-is.
+
+ __ z_tmll(value, JNIHandles::tag_mask);
+ __ z_btrue(tagged); // not zero
+
+ // Resolve Local handle
+ __ access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, Address(value, 0), value, tmp1, tmp2);
+ __ z_bru(verify);
+
+ __ bind(tagged);
+ __ testbit(value, exact_log2(JNIHandles::TypeTag::weak_global)); // test for weak tag
+ __ z_btrue(weak_tag);
+
+ // resolve global handle
+ __ access_load_at(T_OBJECT, IN_NATIVE, Address(value, -JNIHandles::TypeTag::global), value, tmp1, tmp2);
+ __ z_bru(verify);
+
+ __ bind(weak_tag);
+ // resolve jweak.
+ __ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
+ Address(value, -JNIHandles::TypeTag::weak_global), value, tmp1, tmp2);
+ __ bind(verify);
+ __ verify_oop(value, FILE_AND_LINE);
+ __ bind(done);
+}
+
+// Generic implementation. GCs can provide an optimized one.
+void BarrierSetAssembler::resolve_global_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2) {
+ assert_different_registers(value, tmp1, tmp2);
+ NearLabel done;
+
+ __ z_ltgr(value, value);
+ __ z_bre(done); // use null as-is.
+#ifdef ASSERT
+ {
+ NearLabel valid_global_tag;
+ __ testbit(value, exact_log2(JNIHandles::TypeTag::global)); // test for global tag
+ __ z_btrue(valid_global_tag);
+ __ stop("non global jobject using resolve_global_jobject");
+ __ bind(valid_global_tag);
+ }
+#endif // ASSERT
+
+ // Resolve global handle
+ __ access_load_at(T_OBJECT, IN_NATIVE, Address(value, -JNIHandles::TypeTag::global), value, tmp1, tmp2);
__ verify_oop(value, FILE_AND_LINE);
- __ bind(Ldone);
+ __ bind(done);
}
void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
@@ -150,8 +197,93 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
#ifdef COMPILER2
-OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
- Unimplemented(); // This must be implemented to support late barrier expansion.
+OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) const {
+ if (!OptoReg::is_reg(opto_reg)) {
+ return OptoReg::Bad;
+ }
+
+ VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+ if ((vm_reg->is_Register() || vm_reg ->is_FloatRegister()) && (opto_reg & 1) != 0) {
+ return OptoReg::Bad;
+ }
+
+ return opto_reg;
+}
+
+#undef __
+#define __ _masm->
+
+SaveLiveRegisters::SaveLiveRegisters(MacroAssembler *masm, BarrierStubC2 *stub)
+ : _masm(masm), _reg_mask(stub->preserve_set()) {
+
+ const int register_save_size = iterate_over_register_mask(ACTION_COUNT_ONLY) * BytesPerWord;
+
+ _frame_size = align_up(register_save_size, frame::alignment_in_bytes) + frame::z_abi_160_size; // FIXME: this could be restricted to argument only
+
+ __ save_return_pc();
+ __ push_frame(_frame_size, Z_R14); // FIXME: check if Z_R1_scaratch can do a job here;
+
+ __ z_lg(Z_R14, _z_common_abi(return_pc) + _frame_size, Z_SP);
+
+ iterate_over_register_mask(ACTION_SAVE, _frame_size);
+}
+
+SaveLiveRegisters::~SaveLiveRegisters() {
+ iterate_over_register_mask(ACTION_RESTORE, _frame_size);
+
+ __ pop_frame();
+
+ __ restore_return_pc();
+}
+
+int SaveLiveRegisters::iterate_over_register_mask(IterationAction action, int offset) {
+ int reg_save_index = 0;
+ RegMaskIterator live_regs_iterator(_reg_mask);
+
+ while(live_regs_iterator.has_next()) {
+ const OptoReg::Name opto_reg = live_regs_iterator.next();
+
+ // Filter out stack slots (spilled registers, i.e., stack-allocated registers).
+ if (!OptoReg::is_reg(opto_reg)) {
+ continue;
+ }
+
+ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+ if (vm_reg->is_Register()) {
+ Register std_reg = vm_reg->as_Register();
+
+ if (std_reg->encoding() >= Z_R2->encoding() && std_reg->encoding() <= Z_R15->encoding()) {
+ reg_save_index++;
+
+ if (action == ACTION_SAVE) {
+ __ z_stg(std_reg, offset - reg_save_index * BytesPerWord, Z_SP);
+ } else if (action == ACTION_RESTORE) {
+ __ z_lg(std_reg, offset - reg_save_index * BytesPerWord, Z_SP);
+ } else {
+ assert(action == ACTION_COUNT_ONLY, "Sanity");
+ }
+ }
+ } else if (vm_reg->is_FloatRegister()) {
+ FloatRegister fp_reg = vm_reg->as_FloatRegister();
+ if (fp_reg->encoding() >= Z_F0->encoding() && fp_reg->encoding() <= Z_F15->encoding()
+ && fp_reg->encoding() != Z_F1->encoding()) {
+ reg_save_index++;
+
+ if (action == ACTION_SAVE) {
+ __ z_std(fp_reg, offset - reg_save_index * BytesPerWord, Z_SP);
+ } else if (action == ACTION_RESTORE) {
+ __ z_ld(fp_reg, offset - reg_save_index * BytesPerWord, Z_SP);
+ } else {
+ assert(action == ACTION_COUNT_ONLY, "Sanity");
+ }
+ }
+ } else if (false /* vm_reg->is_VectorRegister() */){
+ fatal("Vector register support is not there yet!");
+ } else {
+ fatal("Register type is not known");
+ }
+ }
+ return reg_save_index;
}
#endif // COMPILER2
diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp
index f83bbb864ea47..fb61adc55b500 100644
--- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp
@@ -32,7 +32,9 @@
#ifdef COMPILER2
#include "code/vmreg.hpp"
#include "opto/optoreg.hpp"
+#include "opto/regmask.hpp"
+class BarrierStubC2;
class Node;
#endif // COMPILER2
@@ -51,6 +53,7 @@ class BarrierSetAssembler: public CHeapObj {
const Address& addr, Register val, Register tmp1, Register tmp2, Register tmp3);
virtual void resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2);
+ virtual void resolve_global_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2);
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
Register obj, Register tmp, Label& slowpath);
@@ -61,8 +64,42 @@ class BarrierSetAssembler: public CHeapObj {
#ifdef COMPILER2
OptoReg::Name refine_register(const Node* node,
- OptoReg::Name opto_reg);
+ OptoReg::Name opto_reg) const;
#endif // COMPILER2
};
+#ifdef COMPILER2
+
+// This class saves and restores the registers that need to be preserved across
+// the runtime call represented by a given C2 barrier stub. Use as follows:
+// {
+// SaveLiveRegisters save(masm, stub);
+// ..
+// __ call_VM_leaf(...);
+// ..
+// }
+
+class SaveLiveRegisters {
+ MacroAssembler* _masm;
+ RegMask _reg_mask;
+ Register _result_reg;
+ int _frame_size;
+
+ public:
+ SaveLiveRegisters(MacroAssembler *masm, BarrierStubC2 *stub);
+
+ ~SaveLiveRegisters();
+
+ private:
+ enum IterationAction : int {
+ ACTION_SAVE,
+ ACTION_RESTORE,
+ ACTION_COUNT_ONLY
+ };
+
+ int iterate_over_register_mask(IterationAction action, int offset = 0);
+};
+
+#endif // COMPILER2
+
#endif // CPU_S390_GC_SHARED_BARRIERSETASSEMBLER_S390_HPP
diff --git a/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.cpp
index fd21dd85e1195..f44a72c27abc1 100644
--- a/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018, 2019 SAP SE. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "gc/shared/modRefBarrierSetAssembler.hpp"
+#include "runtime/jniHandles.hpp"
#define __ masm->
@@ -58,3 +59,16 @@ void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet deco
BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
}
}
+
+void ModRefBarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2) {
+ NearLabel done;
+
+ __ z_ltgr(value, value);
+ __ z_bre(done); // use null as-is.
+
+ __ z_nill(value, ~JNIHandles::tag_mask);
+ __ z_lg(value, 0, value); // Resolve (untagged) jobject.
+
+ __ verify_oop(value, FILE_AND_LINE);
+ __ bind(done);
+}
diff --git a/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.hpp b/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.hpp
index 865638477cd7a..7f53d033780c1 100644
--- a/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018 SAP SE. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -48,6 +48,8 @@ class ModRefBarrierSetAssembler: public BarrierSetAssembler {
virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
const Address& dst, Register val, Register tmp1, Register tmp2, Register tmp3);
+
+ virtual void resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2);
};
#endif // CPU_S390_GC_SHARED_MODREFBARRIERSETASSEMBLER_S390_HPP
diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp
index e56beaa9f569c..d00b6c3e2cc2e 100644
--- a/src/hotspot/cpu/s390/interp_masm_s390.cpp
+++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp
@@ -1012,7 +1012,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
}
if (LockingMode == LM_LIGHTWEIGHT) {
- lightweight_lock(object, header, tmp, slow_case);
+ lightweight_lock(monitor, object, header, tmp, slow_case);
} else if (LockingMode == LM_LEGACY) {
// Load markWord from object into header.
diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
index 50de705cd9f0c..e192bbab0deb8 100644
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
@@ -2127,8 +2127,9 @@ unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) {
// Pop current C frame.
void MacroAssembler::pop_frame() {
- BLOCK_COMMENT("pop_frame:");
+ BLOCK_COMMENT("pop_frame {");
Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP);
+ BLOCK_COMMENT("} pop_frame");
}
// Pop current C frame and restore return PC register (Z_R14).
@@ -3655,12 +3656,38 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg
bind(not_recursive);
+ NearLabel check_succ, set_eq_unlocked;
+
+ // Set owner to null.
+ // Release to satisfy the JMM
+ z_release();
+ z_lghi(temp, 0);
+ z_stg(temp, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
+ // We need a full fence after clearing owner to avoid stranding.
+ z_fence();
+
+ // Check if the entry lists are empty.
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
- z_brne(done);
+ z_brne(check_succ);
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
- z_brne(done);
- z_release();
- z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
+ z_bre(done); // If so we are done.
+
+ bind(check_succ);
+
+ // Check if there is a successor.
+ load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)));
+ z_brne(set_eq_unlocked); // If so we are done.
+
+ // Save the monitor pointer in the current thread, so we can try to
+ // reacquire the lock in SharedRuntime::monitor_exit_helper().
+ z_xilf(currentHeader, markWord::monitor_value);
+ z_stg(currentHeader, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset()));
+
+ z_ltgr(oop, oop); // Set flag = NE
+ z_bru(done);
+
+ bind(set_eq_unlocked);
+ z_cr(temp, temp); // Set flag = EQ
bind(done);
@@ -3674,6 +3701,11 @@ void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp
bs->resolve_jobject(this, value, tmp1, tmp2);
}
+void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) {
+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->resolve_global_jobject(this, value, tmp1, tmp2);
+}
+
// Last_Java_sp must comply to the rules in frame_s390.hpp.
void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) {
BLOCK_COMMENT("set_last_Java_frame {");
@@ -6002,10 +6034,10 @@ SkipIfEqual::~SkipIfEqual() {
// - obj: the object to be locked, contents preserved.
// - temp1, temp2: temporary registers, contents destroyed.
// Note: make sure Z_R1 is not manipulated here when C2 compiler is in play
-void MacroAssembler::lightweight_lock(Register obj, Register temp1, Register temp2, Label& slow) {
+void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register temp1, Register temp2, Label& slow) {
assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
- assert_different_registers(obj, temp1, temp2);
+ assert_different_registers(basic_lock, obj, temp1, temp2);
Label push;
const Register top = temp1;
@@ -6017,6 +6049,11 @@ void MacroAssembler::lightweight_lock(Register obj, Register temp1, Register tem
// instruction emitted as it is part of C1's null check semantics.
z_lg(mark, Address(obj, mark_offset));
+ if (UseObjectMonitorTable) {
+ // Clear cache in case fast locking succeeds.
+ const Address om_cache_addr = Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes())));
+ z_mvghi(om_cache_addr, 0);
+ }
// First we need to check if the lock-stack has room for pushing the object reference.
z_lgf(top, Address(Z_thread, ls_top_offset));
@@ -6140,8 +6177,8 @@ void MacroAssembler::lightweight_unlock(Register obj, Register temp1, Register t
bind(unlocked);
}
-void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Register tmp1, Register tmp2) {
- assert_different_registers(obj, tmp1, tmp2);
+void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2) {
+ assert_different_registers(obj, box, tmp1, tmp2);
// Handle inflated monitor.
NearLabel inflated;
@@ -6150,6 +6187,11 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Registe
// Finish fast lock unsuccessfully. MUST branch to with flag == EQ
NearLabel slow_path;
+ if (UseObjectMonitorTable) {
+ // Clear cache in case fast locking succeeds.
+ z_mvghi(Address(box, BasicLock::object_monitor_cache_offset_in_bytes()), 0);
+ }
+
if (DiagnoseSyncOnValueBasedClasses != 0) {
load_klass(tmp1, obj);
z_tm(Address(tmp1, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class);
@@ -6214,33 +6256,77 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Registe
{ // Handle inflated monitor.
bind(inflated);
+ const Register tmp1_monitor = tmp1;
if (!UseObjectMonitorTable) {
- // mark contains the tagged ObjectMonitor*.
- const Register tagged_monitor = mark;
- const Register zero = tmp2;
-
- // Try to CAS m->owner from null to current thread.
- // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
- // Otherwise, register zero is filled with the current owner.
- z_lghi(zero, 0);
- z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), tagged_monitor);
- z_bre(locked);
-
- // Check if recursive.
- z_cgr(Z_thread, zero); // zero contains the owner from z_csg instruction
- z_brne(slow_path);
-
- // Recursive
- z_agsi(Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 1ll);
- z_cgr(zero, zero);
- // z_bru(locked);
- // Uncomment above line in the future, for now jump address is right next to us.
+ assert(tmp1_monitor == mark, "should be the same here");
} else {
- // OMCache lookup not supported yet. Take the slowpath.
- // Set flag to NE
- z_ltgr(obj, obj);
+ NearLabel monitor_found;
+
+ // load cache address
+ z_la(tmp1, Address(Z_thread, JavaThread::om_cache_oops_offset()));
+
+ const int num_unrolled = 2;
+ for (int i = 0; i < num_unrolled; i++) {
+ z_cg(obj, Address(tmp1));
+ z_bre(monitor_found);
+ add2reg(tmp1, in_bytes(OMCache::oop_to_oop_difference()));
+ }
+
+ NearLabel loop;
+ // Search for obj in cache
+
+ bind(loop);
+
+ // check for match.
+ z_cg(obj, Address(tmp1));
+ z_bre(monitor_found);
+
+ // search until null encountered, guaranteed _null_sentinel at end.
+ add2reg(tmp1, in_bytes(OMCache::oop_to_oop_difference()));
+ z_cghsi(0, tmp1, 0);
+ z_brne(loop); // if not EQ to 0, go for another loop
+
+ // we reached to the end, cache miss
+ z_ltgr(obj, obj); // set CC to NE
z_bru(slow_path);
+
+ // cache hit
+ bind(monitor_found);
+ z_lg(tmp1_monitor, Address(tmp1, OMCache::oop_to_monitor_difference()));
}
+ NearLabel monitor_locked;
+ // lock the monitor
+
+ // mark contains the tagged ObjectMonitor*.
+ const Register tagged_monitor = mark;
+ const Register zero = tmp2;
+
+ const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value));
+ const Address owner_address(tmp1_monitor, ObjectMonitor::owner_offset() - monitor_tag);
+ const Address recursions_address(tmp1_monitor, ObjectMonitor::recursions_offset() - monitor_tag);
+
+
+ // Try to CAS m->owner from null to current thread.
+ // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
+ // Otherwise, register zero is filled with the current owner.
+ z_lghi(zero, 0);
+ z_csg(zero, Z_thread, owner_address);
+ z_bre(monitor_locked);
+
+ // Check if recursive.
+ z_cgr(Z_thread, zero); // zero contains the owner from z_csg instruction
+ z_brne(slow_path);
+
+ // Recursive
+ z_agsi(recursions_address, 1ll);
+
+ bind(monitor_locked);
+ if (UseObjectMonitorTable) {
+ // Cache the monitor for unlock
+ z_stg(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
+ }
+ // set the CC now
+ z_cgr(obj, obj);
}
BLOCK_COMMENT("} handle_inflated_monitor_lightweight_locking");
@@ -6265,11 +6351,11 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Registe
// C2 uses the value of flag (NE vs EQ) to determine the continuation.
}
-void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Register tmp1, Register tmp2) {
- assert_different_registers(obj, tmp1, tmp2);
+void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2) {
+ assert_different_registers(obj, box, tmp1, tmp2);
// Handle inflated monitor.
- NearLabel inflated, inflated_load_monitor;
+ NearLabel inflated, inflated_load_mark;
// Finish fast unlock successfully. MUST reach to with flag == EQ.
NearLabel unlocked;
// Finish fast unlock unsuccessfully. MUST branch to with flag == NE.
@@ -6289,7 +6375,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis
z_aghi(top, -oopSize);
z_cg(obj, Address(Z_thread, top));
- branch_optimized(bcondNotEqual, inflated_load_monitor);
+ branch_optimized(bcondNotEqual, inflated_load_mark);
// Pop lock-stack.
#ifdef ASSERT
@@ -6310,6 +6396,9 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis
// Not recursive
// Check for monitor (0b10).
+ // Because we got here by popping (meaning we pushed in locked)
+ // there will be no monitor in the box. So we need to push back the obj
+ // so that the runtime can fix any potential anonymous owner.
z_lg(mark, Address(obj, mark_offset));
z_tmll(mark, markWord::monitor_value);
if (!UseObjectMonitorTable) {
@@ -6348,7 +6437,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis
{ // Handle inflated monitor.
- bind(inflated_load_monitor);
+ bind(inflated_load_mark);
z_lg(mark, Address(obj, mark_offset));
@@ -6373,49 +6462,77 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis
bind(check_done);
#endif // ASSERT
+ const Register tmp1_monitor = tmp1;
+
if (!UseObjectMonitorTable) {
- // mark contains the tagged ObjectMonitor*.
- const Register monitor = mark;
+ assert(tmp1_monitor == mark, "should be the same here");
+ } else {
+ // Uses ObjectMonitorTable. Look for the monitor in our BasicLock on the stack.
+ z_lg(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
+ // null check with ZF == 0, no valid pointer below alignof(ObjectMonitor*)
+ z_cghi(tmp1_monitor, alignof(ObjectMonitor*));
- NearLabel not_recursive;
- const Register recursions = tmp2;
+ z_brl(slow_path);
+ }
- // Check if recursive.
- load_and_test_long(recursions, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
- z_bre(not_recursive); // if 0 then jump, it's not recursive locking
+ // mark contains the tagged ObjectMonitor*.
+ const Register monitor = mark;
- // Recursive unlock
- z_agsi(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), -1ll);
- z_cgr(monitor, monitor); // set the CC to EQUAL
- z_bru(unlocked);
+ const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value));
+ const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag};
+ const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag};
+ const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag};
+ const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag};
+ const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag};
- bind(not_recursive);
+ NearLabel not_recursive;
+ const Register recursions = tmp2;
- NearLabel not_ok;
- // Check if the entry lists are empty.
- load_and_test_long(tmp2, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
- z_brne(not_ok);
- load_and_test_long(tmp2, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
- z_brne(not_ok);
+ // Check if recursive.
+ load_and_test_long(recursions, recursions_address);
+ z_bre(not_recursive); // if 0 then jump, it's not recursive locking
- z_release();
- z_stg(tmp2 /*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor);
+ // Recursive unlock
+ z_agsi(recursions_address, -1ll);
+ z_cgr(monitor, monitor); // set the CC to EQUAL
+ z_bru(unlocked);
- z_bru(unlocked); // CC = EQ here
+ bind(not_recursive);
- bind(not_ok);
+ NearLabel check_succ, set_eq_unlocked;
- // The owner may be anonymous, and we removed the last obj entry in
- // the lock-stack. This loses the information about the owner.
- // Write the thread to the owner field so the runtime knows the owner.
- z_stg(Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor);
- z_bru(slow_path); // CC = NE here
- } else {
- // OMCache lookup not supported yet. Take the slowpath.
- // Set flag to NE
- z_ltgr(obj, obj);
- z_bru(slow_path);
+ // Set owner to null.
+ // Release to satisfy the JMM
+ z_release();
+ z_lghi(tmp2, 0);
+ z_stg(tmp2 /*=0*/, owner_address);
+ // We need a full fence after clearing owner to avoid stranding.
+ z_fence();
+
+ // Check if the entry lists are empty.
+ load_and_test_long(tmp2, EntryList_address);
+ z_brne(check_succ);
+ load_and_test_long(tmp2, cxq_address);
+ z_bre(unlocked); // If so we are done.
+
+ bind(check_succ);
+
+ // Check if there is a successor.
+ load_and_test_long(tmp2, succ_address);
+ z_brne(set_eq_unlocked); // If so we are done.
+
+ // Save the monitor pointer in the current thread, so we can try to
+ // reacquire the lock in SharedRuntime::monitor_exit_helper().
+ if (!UseObjectMonitorTable) {
+ z_xilf(monitor, markWord::monitor_value);
}
+ z_stg(monitor, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset()));
+
+ z_ltgr(obj, obj); // Set flag = NE
+ z_bru(slow_path);
+
+ bind(set_eq_unlocked);
+ z_cr(tmp2, tmp2); // Set flag = EQ
}
bind(unlocked);
diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.hpp b/src/hotspot/cpu/s390/macroAssembler_s390.hpp
index 90210eb28c3ad..5d3a4c2994091 100644
--- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp
@@ -752,12 +752,13 @@ class MacroAssembler: public Assembler {
void compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2);
void compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2);
- void lightweight_lock(Register obj, Register tmp1, Register tmp2, Label& slow);
+ void lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Label& slow);
void lightweight_unlock(Register obj, Register tmp1, Register tmp2, Label& slow);
- void compiler_fast_lock_lightweight_object(Register obj, Register tmp1, Register tmp2);
- void compiler_fast_unlock_lightweight_object(Register obj, Register tmp1, Register tmp2);
+ void compiler_fast_lock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2);
+ void compiler_fast_unlock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2);
void resolve_jobject(Register value, Register tmp1, Register tmp2);
+ void resolve_global_jobject(Register value, Register tmp1, Register tmp2);
// Support for last Java frame (but use call_VM instead where possible).
private:
@@ -819,7 +820,6 @@ class MacroAssembler: public Assembler {
void compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybenull);
// Access heap oop, handle encoding and GC barriers.
- private:
void access_store_at(BasicType type, DecoratorSet decorators,
const Address& addr, Register val,
Register tmp1, Register tmp2, Register tmp3);
diff --git a/src/hotspot/cpu/s390/register_s390.hpp b/src/hotspot/cpu/s390/register_s390.hpp
index 931e899257e92..18af232e56970 100644
--- a/src/hotspot/cpu/s390/register_s390.hpp
+++ b/src/hotspot/cpu/s390/register_s390.hpp
@@ -448,4 +448,12 @@ constexpr Register Z_R0_scratch = Z_R0;
constexpr Register Z_R1_scratch = Z_R1;
constexpr FloatRegister Z_fscratch_1 = Z_F1;
+typedef AbstractRegSet RegSet;
+
+template <>
+inline Register AbstractRegSet::first() {
+ if (_bitset == 0) { return noreg; }
+ return as_Register(count_trailing_zeros(_bitset));
+}
+
#endif // CPU_S390_REGISTER_S390_HPP
diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
index 4de1a4e7b7f35..8181e96ecfc55 100644
--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@@ -1644,6 +1644,10 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() {
// Should the matcher clone input 'm' of node 'n'?
bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
+ if (is_encode_and_store_pattern(n, m)) {
+ mstack.push(m, Visit);
+ return true;
+ }
return false;
}
@@ -3913,6 +3917,7 @@ instruct loadL_unaligned(iRegL dst, memory mem) %{
// Load Pointer
instruct loadP(iRegP dst, memory mem) %{
match(Set dst (LoadP mem));
+ predicate(n->as_Load()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
size(Z_DISP3_SIZE);
format %{ "LG $dst,$mem\t # ptr" %}
@@ -3924,6 +3929,7 @@ instruct loadP(iRegP dst, memory mem) %{
// LoadP + CastP2L
instruct castP2X_loadP(iRegL dst, memory mem) %{
match(Set dst (CastP2X (LoadP mem)));
+ predicate(n->as_Load()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
size(Z_DISP3_SIZE);
format %{ "LG $dst,$mem\t # ptr + p2x" %}
@@ -4286,6 +4292,7 @@ instruct storeL(memory mem, iRegL src) %{
// Store Pointer
instruct storeP(memory dst, memoryRegP src) %{
match(Set dst (StoreP dst src));
+ predicate(n->as_Store()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
size(Z_DISP3_SIZE);
format %{ "STG $src,$dst\t # ptr" %}
@@ -4388,6 +4395,7 @@ instruct memInitL(memoryRS mem, immL16 src) %{
// Move Immediate to 8-byte memory.
instruct memInitP(memoryRS mem, immP16 src) %{
match(Set mem (StoreP mem src));
+ predicate(n->as_Store()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
size(6);
format %{ "MVGHI $mem,$src\t # direct mem init 8" %}
@@ -4417,6 +4425,7 @@ instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{
// Load narrow oop
instruct loadN(iRegN dst, memory mem) %{
match(Set dst (LoadN mem));
+ predicate(n->as_Load()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
size(Z_DISP3_SIZE);
format %{ "LoadN $dst,$mem\t # (cOop)" %}
@@ -4480,7 +4489,7 @@ instruct loadConNKlass(iRegN dst, immNKlass src) %{
instruct decodeLoadN(iRegP dst, memory mem) %{
match(Set dst (DecodeN (LoadN mem)));
- predicate(false && (CompressedOops::base()==nullptr)&&(CompressedOops::shift()==0));
+ predicate(false && (CompressedOops::base()==nullptr) && (CompressedOops::shift()==0));
ins_cost(MEMORY_REF_COST);
size(Z_DISP3_SIZE);
format %{ "DecodeLoadN $dst,$mem\t # (cOop Load+Decode)" %}
@@ -4628,7 +4637,7 @@ instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{
match(Set dst (EncodeP src));
effect(KILL cr);
predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
- (CompressedOops::base() == 0 ||
+ (CompressedOops::base() == nullptr ||
CompressedOops::base_disjoint() ||
!ExpandLoadingBaseEncode));
ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
@@ -4651,7 +4660,7 @@ instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{
match(Set dst (EncodeP src));
effect(KILL cr);
predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
- (CompressedOops::base() == 0 ||
+ (CompressedOops::base() == nullptr ||
CompressedOops::base_disjoint() ||
!ExpandLoadingBaseEncode_NN));
ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
@@ -4735,6 +4744,7 @@ instruct encodeP_NN_Ex(iRegN dst, iRegP src, flagsReg cr) %{
// Store Compressed Pointer
instruct storeN(memory mem, iRegN_P2N src) %{
match(Set mem (StoreN mem src));
+ predicate(n->as_Store()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
size(Z_DISP_SIZE);
format %{ "ST $src,$mem\t # (cOop)" %}
@@ -5146,6 +5156,7 @@ instruct compareAndSwapL_bool(iRegP mem_ptr, rarg5RegL oldval, iRegL newval, iRe
instruct compareAndSwapP_bool(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegI res, flagsReg cr) %{
match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
+ predicate(n->as_LoadStore()->barrier_data() == 0);
effect(USE mem_ptr, USE_KILL oldval, KILL cr);
size(18);
format %{ "$res = CompareAndSwapP $oldval,$newval,$mem_ptr" %}
@@ -5156,6 +5167,7 @@ instruct compareAndSwapP_bool(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval,
instruct compareAndSwapN_bool(iRegP mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegI res, flagsReg cr) %{
match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
+ predicate(n->as_LoadStore()->barrier_data() == 0);
effect(USE mem_ptr, USE_KILL oldval, KILL cr);
size(16);
format %{ "$res = CompareAndSwapN $oldval,$newval,$mem_ptr" %}
@@ -5443,6 +5455,7 @@ instruct xchgL_reg_mem(memoryRSY mem, iRegL dst, iRegL tmp, flagsReg cr) %{
%}
instruct xchgN_reg_mem(memoryRSY mem, iRegN dst, iRegI tmp, flagsReg cr) %{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set dst (GetAndSetN mem dst));
effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule.
format %{ "XCHGN $dst,[$mem]\t # EXCHANGE (coop, atomic), temp $tmp" %}
@@ -5452,6 +5465,7 @@ instruct xchgN_reg_mem(memoryRSY mem, iRegN dst, iRegI tmp, flagsReg cr) %{
instruct xchgP_reg_mem(memoryRSY mem, iRegP dst, iRegL tmp, flagsReg cr) %{
match(Set dst (GetAndSetP mem dst));
+ predicate(n->as_LoadStore()->barrier_data() == 0);
effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule.
format %{ "XCHGP $dst,[$mem]\t # EXCHANGE (oop, atomic), temp $tmp" %}
ins_encode(z_enc_SwapL(mem, dst, tmp));
@@ -5926,7 +5940,7 @@ instruct addP_regN_reg_imm20(iRegP dst, iRegP_N2P src1, iRegL src2, immL20 con)
instruct addP_mem_imm(memoryRSY mem, immL8 src, flagsReg cr) %{
match(Set mem (StoreP mem (AddP (LoadP mem) src)));
effect(KILL cr);
- predicate(VM_Version::has_MemWithImmALUOps());
+ predicate(VM_Version::has_MemWithImmALUOps() && n->as_LoadStore()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
size(6);
format %{ "AGSI $mem,$src\t # direct mem add 8 (ptr)" %}
diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
index 9954c78ce1efa..468610b588e91 100644
--- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
+++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
@@ -43,6 +43,7 @@
#include "runtime/sharedRuntime.hpp"
#include "runtime/signature.hpp"
#include "runtime/stubRoutines.hpp"
+#include "runtime/timerTrace.hpp"
#include "runtime/vframeArray.hpp"
#include "utilities/align.hpp"
#include "utilities/macros.hpp"
@@ -1713,7 +1714,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Try fastpath for locking.
if (LockingMode == LM_LIGHTWEIGHT) {
// Fast_lock kills r_temp_1, r_temp_2.
- __ compiler_fast_lock_lightweight_object(r_oop, r_tmp1, r_tmp2);
+ __ compiler_fast_lock_lightweight_object(r_oop, r_box, r_tmp1, r_tmp2);
} else {
// Fast_lock kills r_temp_1, r_temp_2.
__ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
@@ -1917,7 +1918,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Try fastpath for unlocking.
if (LockingMode == LM_LIGHTWEIGHT) {
// Fast_unlock kills r_tmp1, r_tmp2.
- __ compiler_fast_unlock_lightweight_object(r_oop, r_tmp1, r_tmp2);
+ __ compiler_fast_unlock_lightweight_object(r_oop, r_box, r_tmp1, r_tmp2);
} else {
// Fast_unlock kills r_tmp1, r_tmp2.
__ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2);
diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp
index d878731cca51f..dd9ed4c95462b 100644
--- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp
+++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp
@@ -3053,6 +3053,29 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ // load Method* target of MethodHandle
+ // Z_ARG1 = jobject receiver
+ // Z_method = Method* result
+ address generate_upcall_stub_load_target() {
+ StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target");
+ address start = __ pc();
+
+ __ resolve_global_jobject(Z_ARG1, Z_tmp_1, Z_tmp_2);
+ // Load target method from receiver
+ __ load_heap_oop(Z_method, Address(Z_ARG1, java_lang_invoke_MethodHandle::form_offset()),
+ noreg, noreg, IS_NOT_NULL);
+ __ load_heap_oop(Z_method, Address(Z_method, java_lang_invoke_LambdaForm::vmentry_offset()),
+ noreg, noreg, IS_NOT_NULL);
+ __ load_heap_oop(Z_method, Address(Z_method, java_lang_invoke_MemberName::method_offset()),
+ noreg, noreg, IS_NOT_NULL);
+ __ z_lg(Z_method, Address(Z_method, java_lang_invoke_ResolvedMethodName::vmtarget_offset()));
+ __ z_stg(Z_method, Address(Z_thread, JavaThread::callee_target_offset())); // just in case callee is deoptimized
+
+ __ z_br(Z_R14);
+
+ return start;
+ }
+
void generate_initial_stubs() {
// Generates all stubs and initializes the entry points.
@@ -3110,6 +3133,7 @@ class StubGenerator: public StubCodeGenerator {
}
StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler();
+ StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target();
}
void generate_compiler_stubs() {
diff --git a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
index c16e444904563..2c2e8ed9e3b3a 100644
--- a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
+++ b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
@@ -1224,6 +1224,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
case Interpreter::java_lang_math_sin : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); break;
case Interpreter::java_lang_math_cos : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); break;
case Interpreter::java_lang_math_tan : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); break;
+ case Interpreter::java_lang_math_tanh : /* run interpreted */ break;
case Interpreter::java_lang_math_abs : /* run interpreted */ break;
case Interpreter::java_lang_math_sqrt : /* runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt); not available */ break;
case Interpreter::java_lang_math_log : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); break;
diff --git a/src/hotspot/cpu/s390/upcallLinker_s390.cpp b/src/hotspot/cpu/s390/upcallLinker_s390.cpp
index 734b4e89c7cb2..8baad40a519a4 100644
--- a/src/hotspot/cpu/s390/upcallLinker_s390.cpp
+++ b/src/hotspot/cpu/s390/upcallLinker_s390.cpp
@@ -23,6 +23,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
+#include "classfile/javaClasses.hpp"
#include "logging/logStream.hpp"
#include "memory/resourceArea.hpp"
#include "prims/upcallLinker.hpp"
@@ -116,7 +117,7 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr
static const int upcall_stub_code_base_size = 1024;
static const int upcall_stub_size_per_arg = 16; // arg save & restore + move
-address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
@@ -206,7 +207,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
__ block_comment("on_entry {");
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, UpcallLinker::on_entry));
__ z_aghik(Z_ARG1, Z_SP, frame_data_offset);
- __ load_const_optimized(Z_ARG2, (intptr_t)receiver);
__ call(call_target_address);
__ z_lgr(Z_thread, Z_RET);
__ block_comment("} on_entry");
@@ -216,12 +216,11 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
arg_shuffle.generate(_masm, shuffle_reg, abi._shadow_space_bytes, frame::z_jit_out_preserve_size);
__ block_comment("} argument_shuffle");
- __ block_comment("receiver {");
- __ get_vm_result(Z_ARG1);
- __ block_comment("} receiver");
-
- __ load_const_optimized(Z_method, (intptr_t)entry);
- __ z_stg(Z_method, Address(Z_thread, in_bytes(JavaThread::callee_target_offset())));
+ __ block_comment("load_target {");
+ __ load_const_optimized(Z_ARG1, (intptr_t)receiver);
+ __ load_const_optimized(call_target_address, StubRoutines::upcall_stub_load_target());
+ __ call(call_target_address); // load taget Method* into Z_method
+ __ block_comment("} load_target");
__ z_lg(call_target_address, Address(Z_method, in_bytes(Method::from_compiled_offset())));
__ call(call_target_address);
@@ -274,7 +273,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
#ifndef PRODUCT
stringStream ss;
- ss.print("upcall_stub_%s", entry->signature()->as_C_string());
+ ss.print("upcall_stub_%s", signature->as_C_string());
const char* name = _masm->code_string(ss.as_string());
#else // PRODUCT
const char* name = "upcall_stub";
diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp
index 345b779e8094e..c1679cd111f5a 100644
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -1385,6 +1385,14 @@ void Assembler::addl(Address dst, int32_t imm32) {
emit_arith_operand(0x81, rax, dst, imm32);
}
+void Assembler::eaddl(Register dst, Address src, int32_t imm32, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith_operand(0x81, rax, src, imm32);
+}
+
void Assembler::addb(Address dst, int imm8) {
InstructionMark im(this);
prefix(dst);
@@ -1429,11 +1437,26 @@ void Assembler::addl(Address dst, Register src) {
emit_operand(src, dst, 0);
}
+void Assembler::eaddl(Register dst, Address src1, Register src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x01);
+ emit_operand(src2, src1, 0);
+}
+
void Assembler::addl(Register dst, int32_t imm32) {
prefix(dst);
emit_arith(0x81, 0xC0, dst, imm32);
}
+void Assembler::eaddl(Register dst, Register src, int32_t imm32, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith(0x81, 0xC0, src, imm32);
+}
+
void Assembler::addl(Register dst, Address src) {
InstructionMark im(this);
prefix(src, dst);
@@ -1441,11 +1464,27 @@ void Assembler::addl(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::eaddl(Register dst, Register src1, Address src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x03);
+ emit_operand(src1, src2, 0);
+}
+
void Assembler::addl(Register dst, Register src) {
(void) prefix_and_encode(dst->encoding(), src->encoding());
emit_arith(0x03, 0xC0, dst, src);
}
+void Assembler::eaddl(Register dst, Register src1, Register src2, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ // opcode matches gcc
+ emit_arith(0x01, 0xC0, src1, src2);
+}
+
void Assembler::addr_nop_4() {
assert(UseAddressNop, "no CPU support");
// 4 bytes: NOP DWORD PTR [EAX+0]
@@ -1632,11 +1671,25 @@ void Assembler::andl(Address dst, int32_t imm32) {
emit_arith_operand(0x81, as_Register(4), dst, imm32);
}
+void Assembler::eandl(Register dst, Address src, int32_t imm32, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith_operand(0x81, rsp, src, imm32);
+}
+
void Assembler::andl(Register dst, int32_t imm32) {
prefix(dst);
emit_arith(0x81, 0xE0, dst, imm32);
}
+void Assembler::eandl(Register dst, Register src, int32_t imm32, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith(0x81, 0xE0, src, imm32);
+}
+
void Assembler::andl(Address dst, Register src) {
InstructionMark im(this);
prefix(dst, src);
@@ -1651,11 +1704,27 @@ void Assembler::andl(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::eandl(Register dst, Register src1, Address src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x23);
+ emit_operand(src1, src2, 0);
+}
+
void Assembler::andl(Register dst, Register src) {
(void) prefix_and_encode(dst->encoding(), src->encoding());
emit_arith(0x23, 0xC0, dst, src);
}
+void Assembler::eandl(Register dst, Register src1, Register src2, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ // opcode matches gcc
+ emit_arith(0x21, 0xC0, src1, src2);
+}
+
void Assembler::andnl(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -1803,6 +1872,12 @@ void Assembler::cmovl(Condition cc, Register dst, Register src) {
emit_opcode_prefix_and_encoding(0x40 | cc, 0xC0, encode);
}
+void Assembler::ecmovl(Condition cc, Register dst, Register src1, Register src2) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes);
+ emit_int16((0x40 | cc), (0xC0 | encode));
+}
+
void Assembler::cmovl(Condition cc, Register dst, Address src) {
InstructionMark im(this);
NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
@@ -1811,6 +1886,15 @@ void Assembler::cmovl(Condition cc, Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::ecmovl(Condition cc, Register dst, Register src1, Address src2) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes);
+ emit_int8((0x40 | cc));
+ emit_operand(src1, src2, 0);
+}
+
void Assembler::cmpb(Address dst, Register reg) {
assert(reg->has_byte_register(), "must have byte register");
InstructionMark im(this);
@@ -1835,6 +1919,11 @@ void Assembler::cmpb(Address dst, int imm8) {
emit_int8(imm8);
}
+void Assembler::cmpb(Register dst, int imm8) {
+ prefix(dst);
+ emit_arith_b(0x80, 0xF8, dst, imm8);
+}
+
void Assembler::cmpl(Address dst, int32_t imm32) {
InstructionMark im(this);
prefix(dst);
@@ -2429,6 +2518,15 @@ void Assembler::decl(Address dst) {
emit_operand(rcx, dst, 0);
}
+void Assembler::edecl(Register dst, Address src, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xFF);
+ emit_operand(rcx, src, 0);
+}
+
void Assembler::divsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
@@ -2474,21 +2572,45 @@ void Assembler::idivl(Register src) {
emit_int16((unsigned char)0xF7, (0xF8 | encode));
}
+void Assembler::eidivl(Register src, bool no_flags) { // Signed
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xF7, (0xF8 | encode));
+}
+
void Assembler::divl(Register src) { // Unsigned
int encode = prefix_and_encode(src->encoding());
emit_int16((unsigned char)0xF7, (0xF0 | encode));
}
+void Assembler::edivl(Register src, bool no_flags) { // Unsigned
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xF7, (0xF0 | encode));
+}
+
void Assembler::imull(Register src) {
int encode = prefix_and_encode(src->encoding());
emit_int16((unsigned char)0xF7, (0xE8 | encode));
}
+void Assembler::eimull(Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xF7, (0xE8 | encode));
+}
+
void Assembler::imull(Register dst, Register src) {
int encode = prefix_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
emit_opcode_prefix_and_encoding((unsigned char)0xAF, 0xC0, encode);
}
+void Assembler::eimull(Register dst, Register src1, Register src2, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xAF, (0xC0 | encode));
+}
+
void Assembler::imull(Register dst, Address src, int32_t value) {
InstructionMark im(this);
prefix(src, dst);
@@ -2503,6 +2625,22 @@ void Assembler::imull(Register dst, Address src, int32_t value) {
}
}
+void Assembler::eimull(Register dst, Address src, int32_t value, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (is8bit(value)) {
+ emit_int8((unsigned char)0x6B);
+ emit_operand(dst, src, 1);
+ emit_int8(value);
+ } else {
+ emit_int8((unsigned char)0x69);
+ emit_operand(dst, src, 4);
+ emit_int32(value);
+ }
+}
+
void Assembler::imull(Register dst, Register src, int value) {
int encode = prefix_and_encode(dst->encoding(), src->encoding());
if (is8bit(value)) {
@@ -2513,6 +2651,17 @@ void Assembler::imull(Register dst, Register src, int value) {
}
}
+void Assembler::eimull(Register dst, Register src, int value, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (is8bit(value)) {
+ emit_int24(0x6B, (0xC0 | encode), value & 0xFF);
+ } else {
+ emit_int16(0x69, (0xC0 | encode));
+ emit_int32(value);
+ }
+}
+
void Assembler::imull(Register dst, Address src) {
InstructionMark im(this);
prefix(src, dst, false, true /* is_map1 */);
@@ -2520,6 +2669,14 @@ void Assembler::imull(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::eimull(Register dst, Register src1, Address src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xAF);
+ emit_operand(src1, src2, 0);
+}
void Assembler::incl(Address dst) {
// Don't use it directly. Use MacroAssembler::increment() instead.
@@ -2529,6 +2686,16 @@ void Assembler::incl(Address dst) {
emit_operand(rax, dst, 0);
}
+void Assembler::eincl(Register dst, Address src, bool no_flags) {
+ // Don't use it directly. Use MacroAssembler::increment() instead.
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xFF);
+ emit_operand(rax, src, 0);
+}
+
void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
InstructionMark im(this);
assert((0 <= cc) && (cc < 16), "illegal cc");
@@ -2696,6 +2863,13 @@ void Assembler::lzcntl(Register dst, Register src) {
emit_opcode_prefix_and_encoding((unsigned char)0xBD, 0xC0, encode);
}
+void Assembler::elzcntl(Register dst, Register src, bool no_flags) {
+ assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xF5, (0xC0 | encode));
+}
+
void Assembler::lzcntl(Register dst, Address src) {
assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
InstructionMark im(this);
@@ -2705,6 +2879,16 @@ void Assembler::lzcntl(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::elzcntl(Register dst, Address src, bool no_flags) {
+ assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xF5);
+ emit_operand(dst, src, 0);
+}
+
// Emit mfence instruction
void Assembler::mfence() {
NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
@@ -3855,11 +4039,26 @@ void Assembler::mull(Address src) {
emit_operand(rsp, src, 0);
}
+void Assembler::emull(Address src, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_nf(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xF7);
+ emit_operand(rsp, src, 0);
+}
+
void Assembler::mull(Register src) {
int encode = prefix_and_encode(src->encoding());
emit_int16((unsigned char)0xF7, (0xE0 | encode));
}
+void Assembler::emull(Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xF7, (0xE0 | encode));
+}
+
void Assembler::mulsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
@@ -3901,6 +4100,12 @@ void Assembler::negl(Register dst) {
emit_int16((unsigned char)0xF7, (0xD8 | encode));
}
+void Assembler::enegl(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xF7, (0xD8 | encode));
+}
+
void Assembler::negl(Address dst) {
InstructionMark im(this);
prefix(dst);
@@ -3908,6 +4113,15 @@ void Assembler::negl(Address dst) {
emit_operand(as_Register(3), dst, 0);
}
+void Assembler::enegl(Register dst, Address src, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xF7);
+ emit_operand(as_Register(3), src, 0);
+}
+
void Assembler::nop(uint i) {
#ifdef ASSERT
assert(i > 0, " ");
@@ -4219,17 +4433,48 @@ void Assembler::notl(Register dst) {
emit_int16((unsigned char)0xF7, (0xD0 | encode));
}
+void Assembler::enotl(Register dst, Register src) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes);
+ emit_int16((unsigned char)0xF7, (0xD0 | encode));
+}
+
+void Assembler::orw(Register dst, Register src) {
+ (void)prefix_and_encode(dst->encoding(), src->encoding());
+ emit_arith(0x0B, 0xC0, dst, src);
+}
+
+void Assembler::eorw(Register dst, Register src1, Register src2, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith(0x0B, 0xC0, src1, src2);
+}
+
void Assembler::orl(Address dst, int32_t imm32) {
InstructionMark im(this);
prefix(dst);
emit_arith_operand(0x81, rcx, dst, imm32);
}
+void Assembler::eorl(Register dst, Address src, int32_t imm32, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith_operand(0x81, rcx, src, imm32);
+}
+
void Assembler::orl(Register dst, int32_t imm32) {
prefix(dst);
emit_arith(0x81, 0xC8, dst, imm32);
}
+void Assembler::eorl(Register dst, Register src, int32_t imm32, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith(0x81, 0xC8, src, imm32);
+}
+
void Assembler::orl(Register dst, Address src) {
InstructionMark im(this);
prefix(src, dst);
@@ -4237,11 +4482,27 @@ void Assembler::orl(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::eorl(Register dst, Register src1, Address src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x0B);
+ emit_operand(src1, src2, 0);
+}
+
void Assembler::orl(Register dst, Register src) {
(void) prefix_and_encode(dst->encoding(), src->encoding());
emit_arith(0x0B, 0xC0, dst, src);
}
+void Assembler::eorl(Register dst, Register src1, Register src2, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ // opcode matches gcc
+ emit_arith(0x09, 0xC0, src1, src2);
+}
+
void Assembler::orl(Address dst, Register src) {
InstructionMark im(this);
prefix(dst, src);
@@ -4249,6 +4510,15 @@ void Assembler::orl(Address dst, Register src) {
emit_operand(src, dst, 0);
}
+void Assembler::eorl(Register dst, Address src1, Register src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x09);
+ emit_operand(src2, src1, 0);
+}
+
void Assembler::orb(Address dst, int imm8) {
InstructionMark im(this);
prefix(dst);
@@ -4257,6 +4527,16 @@ void Assembler::orb(Address dst, int imm8) {
emit_int8(imm8);
}
+void Assembler::eorb(Register dst, Address src, int imm8, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_8bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0x80);
+ emit_operand(rcx, src, 1);
+ emit_int8(imm8);
+}
+
void Assembler::orb(Address dst, Register src) {
InstructionMark im(this);
prefix(dst, src, true);
@@ -4264,6 +4544,15 @@ void Assembler::orb(Address dst, Register src) {
emit_operand(src, dst, 0);
}
+void Assembler::eorb(Register dst, Address src1, Register src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_8bit);
+ evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x08);
+ emit_operand(src2, src1, 0);
+}
+
void Assembler::packsswb(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -5384,6 +5673,16 @@ void Assembler::popcntl(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::epopcntl(Register dst, Address src, bool no_flags) {
+ assert(VM_Version::supports_popcnt(), "must support");
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0x88);
+ emit_operand(dst, src, 0);
+}
+
void Assembler::popcntl(Register dst, Register src) {
assert(VM_Version::supports_popcnt(), "must support");
emit_int8((unsigned char)0xF3);
@@ -5391,6 +5690,13 @@ void Assembler::popcntl(Register dst, Register src) {
emit_opcode_prefix_and_encoding((unsigned char)0xB8, 0xC0, encode);
}
+void Assembler::epopcntl(Register dst, Register src, bool no_flags) {
+ assert(VM_Version::supports_popcnt(), "must support");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0x88, (0xC0 | encode));
+}
+
void Assembler::evpopcntb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512_bitalg(), "must support avx512bitalg feature");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
@@ -5979,6 +6285,17 @@ void Assembler::rcll(Register dst, int imm8) {
}
}
+void Assembler::ercll(Register dst, Register src, int imm8) {
+ assert(isShiftCount(imm8), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes);
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xD0 | encode));
+ } else {
+ emit_int24((unsigned char)0xC1, (0xD0 | encode), imm8);
+ }
+}
+
void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -6059,11 +6376,28 @@ void Assembler::roll(Register dst, int imm8) {
}
}
+void Assembler::eroll(Register dst, Register src, int imm8, bool no_flags) {
+ assert(isShiftCount(imm8), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xC0 | encode));
+ } else {
+ emit_int24((unsigned char)0xC1, (0xc0 | encode), imm8);
+ }
+}
+
void Assembler::roll(Register dst) {
int encode = prefix_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xC0 | encode));
}
+void Assembler::eroll(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xD3, (0xC0 | encode));
+}
+
void Assembler::rorl(Register dst, int imm8) {
assert(isShiftCount(imm8), "illegal shift count");
int encode = prefix_and_encode(dst->encoding());
@@ -6074,17 +6408,40 @@ void Assembler::rorl(Register dst, int imm8) {
}
}
+void Assembler::erorl(Register dst, Register src, int imm8, bool no_flags) {
+ assert(isShiftCount(imm8), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xC8 | encode));
+ } else {
+ emit_int24((unsigned char)0xC1, (0xc8 | encode), imm8);
+ }
+}
+
void Assembler::rorl(Register dst) {
int encode = prefix_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xC8 | encode));
}
+void Assembler::erorl(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xD3, (0xC8 | encode));
+}
+
#ifdef _LP64
void Assembler::rorq(Register dst) {
int encode = prefixq_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xC8 | encode));
}
+void Assembler::erorq(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xD3, (0xC8 | encode));
+}
+
void Assembler::rorq(Register dst, int imm8) {
assert(isShiftCount(imm8 >> 1), "illegal shift count");
int encode = prefixq_and_encode(dst->encoding());
@@ -6095,11 +6452,28 @@ void Assembler::rorq(Register dst, int imm8) {
}
}
+void Assembler::erorq(Register dst, Register src, int imm8, bool no_flags) {
+ assert(isShiftCount(imm8), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xC8 | encode));
+ } else {
+ emit_int24((unsigned char)0xC1, (0xC8 | encode), imm8);
+ }
+}
+
void Assembler::rolq(Register dst) {
int encode = prefixq_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xC0 | encode));
}
+void Assembler::erolq(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xD3, (0xC0 | encode));
+}
+
void Assembler::rolq(Register dst, int imm8) {
assert(isShiftCount(imm8 >> 1), "illegal shift count");
int encode = prefixq_and_encode(dst->encoding());
@@ -6109,6 +6483,17 @@ void Assembler::rolq(Register dst, int imm8) {
emit_int24((unsigned char)0xC1, (0xc0 | encode), imm8);
}
}
+
+void Assembler::erolq(Register dst, Register src, int imm8, bool no_flags) {
+ assert(isShiftCount(imm8), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xC0 | encode));
+ } else {
+ emit_int24((unsigned char)0xC1, (0xc0 | encode), imm8);
+ }
+ }
#endif
void Assembler::sahf() {
@@ -6134,19 +6519,56 @@ void Assembler::sall(Address dst, int imm8) {
}
}
-void Assembler::sall(Address dst) {
+void Assembler::esall(Register dst, Address src, int imm8, bool no_flags) {
InstructionMark im(this);
- prefix(dst);
- emit_int8((unsigned char)0xD3);
- emit_operand(as_Register(4), dst, 0);
-}
-
-void Assembler::sall(Register dst, int imm8) {
assert(isShiftCount(imm8), "illegal shift count");
- int encode = prefix_and_encode(dst->encoding());
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
if (imm8 == 1) {
- emit_int16((unsigned char)0xD1, (0xE0 | encode));
- } else {
+ emit_int8((unsigned char)0xD1);
+ emit_operand(as_Register(4), src, 0);
+ }
+ else {
+ emit_int8((unsigned char)0xC1);
+ emit_operand(as_Register(4), src, 1);
+ emit_int8(imm8);
+ }
+}
+
+void Assembler::sall(Address dst) {
+ InstructionMark im(this);
+ prefix(dst);
+ emit_int8((unsigned char)0xD3);
+ emit_operand(as_Register(4), dst, 0);
+}
+
+void Assembler::esall(Register dst, Address src, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xD3);
+ emit_operand(as_Register(4), src, 0);
+}
+
+void Assembler::sall(Register dst, int imm8) {
+ assert(isShiftCount(imm8), "illegal shift count");
+ int encode = prefix_and_encode(dst->encoding());
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xE0 | encode));
+ } else {
+ emit_int24((unsigned char)0xC1, (0xE0 | encode), imm8);
+ }
+}
+
+void Assembler::esall(Register dst, Register src, int imm8, bool no_flags) {
+ assert(isShiftCount(imm8), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xE0 | encode));
+ } else {
emit_int24((unsigned char)0xC1, (0xE0 | encode), imm8);
}
}
@@ -6156,6 +6578,12 @@ void Assembler::sall(Register dst) {
emit_int16((unsigned char)0xD3, (0xE0 | encode));
}
+void Assembler::esall(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xD3, (0xE0 | encode));
+}
+
void Assembler::sarl(Address dst, int imm8) {
assert(isShiftCount(imm8), "illegal shift count");
InstructionMark im(this);
@@ -6171,6 +6599,23 @@ void Assembler::sarl(Address dst, int imm8) {
}
}
+void Assembler::esarl(Register dst, Address src, int imm8, bool no_flags) {
+ assert(isShiftCount(imm8), "illegal shift count");
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int8((unsigned char)0xD1);
+ emit_operand(as_Register(7), src, 0);
+ }
+ else {
+ emit_int8((unsigned char)0xC1);
+ emit_operand(as_Register(7), src, 1);
+ emit_int8(imm8);
+ }
+}
+
void Assembler::sarl(Address dst) {
InstructionMark im(this);
prefix(dst);
@@ -6178,6 +6623,15 @@ void Assembler::sarl(Address dst) {
emit_operand(as_Register(7), dst, 0);
}
+void Assembler::esarl(Register dst, Address src, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xD3);
+ emit_operand(as_Register(7), src, 0);
+}
+
void Assembler::sarl(Register dst, int imm8) {
int encode = prefix_and_encode(dst->encoding());
assert(isShiftCount(imm8), "illegal shift count");
@@ -6188,11 +6642,28 @@ void Assembler::sarl(Register dst, int imm8) {
}
}
+void Assembler::esarl(Register dst, Register src, int imm8, bool no_flags) {
+ assert(isShiftCount(imm8), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xF8 | encode));
+ } else {
+ emit_int24((unsigned char)0xC1, (0xF8 | encode), imm8);
+ }
+}
+
void Assembler::sarl(Register dst) {
int encode = prefix_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xF8 | encode));
}
+void Assembler::esarl(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xD3, (0xF8 | encode));
+}
+
void Assembler::sbbl(Address dst, int32_t imm32) {
InstructionMark im(this);
prefix(dst);
@@ -6204,7 +6675,6 @@ void Assembler::sbbl(Register dst, int32_t imm32) {
emit_arith(0x81, 0xD8, dst, imm32);
}
-
void Assembler::sbbl(Register dst, Address src) {
InstructionMark im(this);
prefix(src, dst);
@@ -6297,7 +6767,6 @@ void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
emit_int16((unsigned char)0xCD, (0xC0 | encode));
}
-
void Assembler::shll(Register dst, int imm8) {
assert(isShiftCount(imm8), "illegal shift count");
int encode = prefix_and_encode(dst->encoding());
@@ -6308,11 +6777,28 @@ void Assembler::shll(Register dst, int imm8) {
}
}
+void Assembler::eshll(Register dst, Register src, int imm8, bool no_flags) {
+ assert(isShiftCount(imm8), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1 ) {
+ emit_int16((unsigned char)0xD1, (0xE0 | encode));
+ } else {
+ emit_int24((unsigned char)0xC1, (0xE0 | encode), imm8);
+ }
+}
+
void Assembler::shll(Register dst) {
int encode = prefix_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xE0 | encode));
}
+void Assembler::eshll(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xD3, (0xE0 | encode));
+}
+
void Assembler::shrl(Register dst, int imm8) {
assert(isShiftCount(imm8), "illegal shift count");
int encode = prefix_and_encode(dst->encoding());
@@ -6324,11 +6810,29 @@ void Assembler::shrl(Register dst, int imm8) {
}
}
+void Assembler::eshrl(Register dst, Register src, int imm8, bool no_flags) {
+ assert(isShiftCount(imm8), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xE8 | encode));
+ }
+ else {
+ emit_int24((unsigned char)0xC1, (0xE8 | encode), imm8);
+ }
+}
+
void Assembler::shrl(Register dst) {
int encode = prefix_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xE8 | encode));
}
+void Assembler::eshrl(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xD3, (0xE8 | encode));
+}
+
void Assembler::shrl(Address dst) {
InstructionMark im(this);
prefix(dst);
@@ -6336,6 +6840,15 @@ void Assembler::shrl(Address dst) {
emit_operand(as_Register(5), dst, 0);
}
+void Assembler::eshrl(Register dst, Address src, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xD3);
+ emit_operand(as_Register(5), src, 0);
+}
+
void Assembler::shrl(Address dst, int imm8) {
InstructionMark im(this);
assert(isShiftCount(imm8), "illegal shift count");
@@ -6351,37 +6864,89 @@ void Assembler::shrl(Address dst, int imm8) {
}
}
+void Assembler::eshrl(Register dst, Address src, int imm8, bool no_flags) {
+ InstructionMark im(this);
+ assert(isShiftCount(imm8), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int8((unsigned char)0xD1);
+ emit_operand(as_Register(5), src, 0);
+ }
+ else {
+ emit_int8((unsigned char)0xC1);
+ emit_operand(as_Register(5), src, 1);
+ emit_int8(imm8);
+ }
+}
void Assembler::shldl(Register dst, Register src) {
int encode = prefix_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */);
emit_opcode_prefix_and_encoding((unsigned char)0xA5, 0xC0, encode);
}
+void Assembler::eshldl(Register dst, Register src1, Register src2, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16(0xA5, (0xC0 | encode));
+}
+
void Assembler::shldl(Register dst, Register src, int8_t imm8) {
int encode = prefix_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */);
emit_opcode_prefix_and_encoding((unsigned char)0xA4, 0xC0, encode, imm8);
}
+void Assembler::eshldl(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int24(0x24, (0xC0 | encode), imm8);
+}
+
void Assembler::shrdl(Register dst, Register src) {
int encode = prefix_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */);
emit_opcode_prefix_and_encoding((unsigned char)0xAD, 0xC0, encode);
}
+void Assembler::eshrdl(Register dst, Register src1, Register src2, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16(0xAD, (0xC0 | encode));
+}
+
void Assembler::shrdl(Register dst, Register src, int8_t imm8) {
int encode = prefix_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */);
emit_opcode_prefix_and_encoding((unsigned char)0xAC, 0xC0, encode, imm8);
}
+void Assembler::eshrdl(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int24(0x2C, (0xC0 | encode), imm8);
+}
+
#ifdef _LP64
void Assembler::shldq(Register dst, Register src, int8_t imm8) {
int encode = prefixq_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */);
emit_opcode_prefix_and_encoding((unsigned char)0xA4, 0xC0, encode, imm8);
}
+void Assembler::eshldq(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int24(0x24, (0xC0 | encode), imm8);
+}
+
void Assembler::shrdq(Register dst, Register src, int8_t imm8) {
int encode = prefixq_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */);
emit_opcode_prefix_and_encoding((unsigned char)0xAC, 0xC0, encode, imm8);
}
+
+void Assembler::eshrdq(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int24(0x2C, (0xC0 | encode), imm8);
+}
#endif
// copies a single word from [esi] to [edi]
@@ -6472,6 +7037,14 @@ void Assembler::subl(Address dst, int32_t imm32) {
emit_arith_operand(0x81, rbp, dst, imm32);
}
+void Assembler::esubl(Register dst, Address src, int32_t imm32, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith_operand(0x81, rbp, src, imm32);
+}
+
void Assembler::subl(Address dst, Register src) {
InstructionMark im(this);
prefix(dst, src);
@@ -6479,17 +7052,38 @@ void Assembler::subl(Address dst, Register src) {
emit_operand(src, dst, 0);
}
+void Assembler::esubl(Register dst, Address src1, Register src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x29);
+ emit_operand(src2, src1, 0);
+}
+
void Assembler::subl(Register dst, int32_t imm32) {
prefix(dst);
emit_arith(0x81, 0xE8, dst, imm32);
}
+void Assembler::esubl(Register dst, Register src, int32_t imm32, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith(0x81, 0xE8, src, imm32);
+}
+
// Force generation of a 4 byte immediate value even if it fits into 8bit
void Assembler::subl_imm32(Register dst, int32_t imm32) {
prefix(dst);
emit_arith_imm32(0x81, 0xE8, dst, imm32);
}
+void Assembler::esubl_imm32(Register dst, Register src, int32_t imm32, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith_imm32(0x81, 0xE8, src, imm32);
+}
+
void Assembler::subl(Register dst, Address src) {
InstructionMark im(this);
prefix(src, dst);
@@ -6497,11 +7091,27 @@ void Assembler::subl(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::esubl(Register dst, Register src1, Address src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x2B);
+ emit_operand(src1, src2, 0);
+}
+
void Assembler::subl(Register dst, Register src) {
(void) prefix_and_encode(dst->encoding(), src->encoding());
emit_arith(0x2B, 0xC0, dst, src);
}
+void Assembler::esubl(Register dst, Register src2, Register src1, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ // opcode matches gcc
+ emit_arith(0x29, 0xC0, src1, src2);
+}
+
void Assembler::subsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -6605,6 +7215,13 @@ void Assembler::tzcntl(Register dst, Register src) {
emit_opcode_prefix_and_encoding((unsigned char)0xBC, 0xC0, encode);
}
+void Assembler::etzcntl(Register dst, Register src, bool no_flags) {
+ assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xF4, (0xC0 | encode));
+}
+
void Assembler::tzcntl(Register dst, Address src) {
assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
InstructionMark im(this);
@@ -6614,6 +7231,16 @@ void Assembler::tzcntl(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::etzcntl(Register dst, Address src, bool no_flags) {
+ assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xF4);
+ emit_operand(dst, src, 0);
+}
+
void Assembler::tzcntq(Register dst, Register src) {
assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
emit_int8((unsigned char)0xF3);
@@ -6621,6 +7248,13 @@ void Assembler::tzcntq(Register dst, Register src) {
emit_opcode_prefix_and_encoding((unsigned char)0xBC, 0xC0, encode);
}
+void Assembler::etzcntq(Register dst, Register src, bool no_flags) {
+ assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xF4, (0xC0 | encode));
+}
+
void Assembler::tzcntq(Register dst, Address src) {
assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
InstructionMark im(this);
@@ -6630,6 +7264,16 @@ void Assembler::tzcntq(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::etzcntq(Register dst, Address src, bool no_flags) {
+ assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xF4);
+ emit_operand(dst, src, 0);
+}
+
void Assembler::ucomisd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
@@ -6749,11 +7393,25 @@ void Assembler::xorl(Address dst, int32_t imm32) {
emit_arith_operand(0x81, as_Register(6), dst, imm32);
}
+void Assembler::exorl(Register dst, Address src, int32_t imm32, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith_operand(0x81, as_Register(6), src, imm32);
+}
+
void Assembler::xorl(Register dst, int32_t imm32) {
prefix(dst);
emit_arith(0x81, 0xF0, dst, imm32);
}
+void Assembler::exorl(Register dst, Register src, int32_t imm32, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith(0x81, 0xF0, src, imm32);
+}
+
void Assembler::xorl(Register dst, Address src) {
InstructionMark im(this);
prefix(src, dst);
@@ -6761,11 +7419,27 @@ void Assembler::xorl(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::exorl(Register dst, Register src1, Address src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x33);
+ emit_operand(src1, src2, 0);
+}
+
void Assembler::xorl(Register dst, Register src) {
(void) prefix_and_encode(dst->encoding(), src->encoding());
emit_arith(0x33, 0xC0, dst, src);
}
+void Assembler::exorl(Register dst, Register src1, Register src2, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ // opcode matches gcc
+ emit_arith(0x31, 0xC0, src1, src2);
+}
+
void Assembler::xorl(Address dst, Register src) {
InstructionMark im(this);
prefix(dst, src);
@@ -6773,6 +7447,15 @@ void Assembler::xorl(Address dst, Register src) {
emit_operand(src, dst, 0);
}
+void Assembler::exorl(Register dst, Address src1, Register src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x31);
+ emit_operand(src2, src1, 0);
+}
+
void Assembler::xorb(Register dst, Address src) {
InstructionMark im(this);
prefix(src, dst);
@@ -6780,6 +7463,15 @@ void Assembler::xorb(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::exorb(Register dst, Register src1, Address src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_8bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x32);
+ emit_operand(src1, src2, 0);
+}
+
void Assembler::xorb(Address dst, Register src) {
InstructionMark im(this);
prefix(dst, src, true);
@@ -6787,6 +7479,15 @@ void Assembler::xorb(Address dst, Register src) {
emit_operand(src, dst, 0);
}
+void Assembler::exorb(Register dst, Address src1, Register src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_8bit);
+ evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x30);
+ emit_operand(src2, src1, 0);
+}
+
void Assembler::xorw(Register dst, Address src) {
InstructionMark im(this);
emit_int8(0x66);
@@ -6795,6 +7496,16 @@ void Assembler::xorw(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::exorw(Register dst, Register src1, Address src2, bool no_flags) {
+ InstructionMark im(this);
+ emit_int8(0x66);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x33);
+ emit_operand(src1, src2, 0);
+}
+
// AVX 3-operands scalar float-point arithmetic instructions
void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
@@ -7342,6 +8053,14 @@ void Assembler::andpd(XMMRegister dst, XMMRegister src) {
emit_int16(0x54, (0xC0 | encode));
}
+void Assembler::andnpd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_rex_vex_w_reverted();
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x55, (0xC0 | encode));
+}
+
void Assembler::andps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -8953,6 +9672,15 @@ void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src,
emit_int24(0x3A, (0xC0 | encode), imm8 & 0x01);
}
+void Assembler::evinserti64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8, int vector_len) {
+ assert(VM_Version::supports_avx512dq(), "");
+ assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x38, (0xC0 | encode), imm8 & 0x03);
+}
+
// vinsertf forms
@@ -11017,6 +11745,21 @@ void Assembler::vbroadcastf128(XMMRegister dst, Address src, int vector_len) {
emit_operand(dst, src, 0);
}
+void Assembler::evbroadcastf64x2(XMMRegister dst, Address src, int vector_len) {
+ assert(VM_Version::supports_avx512dq(), "");
+ assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
+ assert(dst != xnoreg, "sanity");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T2, /* input_size_in_bits */ EVEX_64bit);
+ attributes.set_is_evex_instruction();
+ // swap src<->dst for encoding
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8(0x1A);
+ emit_operand(dst, src, 0);
+}
+
+
// gpr source broadcast forms
// duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
@@ -11378,6 +12121,12 @@ void Assembler::decl(Register dst) {
emit_int8(0x48 | dst->encoding());
}
+void Assembler::edecl(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x48 | src->encoding());
+}
+
// 64bit doesn't use the x87
void Assembler::fabs() {
@@ -11816,7 +12565,7 @@ void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexS
// This is a 4 byte encoding
void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool eevex_b, bool evex_v,
- bool eevex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
+ bool eevex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool no_flags) {
// EVEX 0x62 prefix
// byte1 = EVEX_4bytes;
@@ -11842,11 +12591,17 @@ void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, boo
// of form {66, F3, F2}
byte3 |= pre;
- // P2: byte 4 as zL'Lbv'aaa
- // kregs are implemented in the low 3 bits as aaa
- int byte4 = (_attributes->is_no_reg_mask()) ?
- 0 :
- _attributes->get_embedded_opmask_register_specifier();
+ // P2: byte 4 as zL'Lbv'aaa or 00LXVF00 where V = V4, X(extended context) = ND and F = NF (no flags)
+ int byte4 = 0;
+ if (no_flags) {
+ assert(_attributes->is_no_reg_mask(), "mask register not supported with no_flags");
+ byte4 |= 0x4;
+ } else {
+ // kregs are implemented in the low 3 bits as aaa
+ byte4 = (_attributes->is_no_reg_mask()) ?
+ 0 :
+ _attributes->get_embedded_opmask_register_specifier();
+ }
// EVEX.v` for extending EVEX.vvvv or VIDX
byte4 |= (evex_v ? 0: EVEX_V);
// third EXEC.b for broadcast actions
@@ -11861,11 +12616,12 @@ void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, boo
emit_int32(EVEX_4bytes, byte2, byte3, byte4);
}
-void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
- if (adr.base_needs_rex2() || adr.index_needs_rex2()) {
+void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool nds_is_ndd, bool no_flags) {
+ if (adr.base_needs_rex2() || adr.index_needs_rex2() || nds_is_ndd || no_flags) {
assert(UseAPX, "APX features not enabled");
}
- bool is_extended = adr.base_needs_rex2() || adr.index_needs_rex2() || nds_enc >= 16 || xreg_enc >= 16;
+ if (nds_is_ndd) attributes->set_extended_context();
+ bool is_extended = adr.base_needs_rex2() || adr.index_needs_rex2() || nds_enc >= 16 || xreg_enc >= 16 || nds_is_ndd;
bool vex_r = (xreg_enc & 8) == 8;
bool vex_b = adr.base_needs_rex();
bool vex_x;
@@ -11908,7 +12664,7 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix
bool eevex_x = adr.index_needs_rex2();
bool eevex_b = adr.base_needs_rex2();
attributes->set_is_evex_instruction();
- evex_prefix(vex_r, vex_b, vex_x, evex_r, eevex_b, evex_v, eevex_x, nds_enc, pre, opc);
+ evex_prefix(vex_r, vex_b, vex_x, evex_r, eevex_b, evex_v, eevex_x, nds_enc, pre, opc, no_flags);
} else {
if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
attributes->set_rex_vex_w(false);
@@ -11917,10 +12673,21 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix
}
}
-int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool src_is_gpr) {
- if (src_is_gpr && src_enc >= 16) {
+void Assembler::evex_prefix_ndd(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool no_flags) {
+ attributes->set_is_evex_instruction();
+ vex_prefix(adr, ndd_enc, xreg_enc, pre, opc, attributes, /* nds_is_ndd */ true, no_flags);
+}
+
+void Assembler::evex_prefix_nf(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool no_flags) {
+ attributes->set_is_evex_instruction();
+ vex_prefix(adr, ndd_enc, xreg_enc, pre, opc, attributes, /* nds_is_ndd */ false, no_flags);
+}
+
+int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool src_is_gpr, bool nds_is_ndd, bool no_flags) {
+ if (nds_is_ndd || no_flags || (src_is_gpr && src_enc >= 16)) {
assert(UseAPX, "APX features not enabled");
}
+ if (nds_is_ndd) attributes->set_extended_context();
bool is_extended = dst_enc >= 16 || nds_enc >= 16 || src_enc >=16;
bool vex_r = (dst_enc & 8) == 8;
bool vex_b = (src_enc & 8) == 8;
@@ -11962,7 +12729,7 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexS
// can use vex_x as bank extender on rm encoding
vex_x = (src_enc >= 16) && !src_is_gpr;
attributes->set_is_evex_instruction();
- evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_b, evex_v, false /*eevex_x*/, nds_enc, pre, opc);
+ evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_b, evex_v, false /*eevex_x*/, nds_enc, pre, opc, no_flags);
} else {
if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
attributes->set_rex_vex_w(false);
@@ -11974,6 +12741,18 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexS
return (((dst_enc & 7) << 3) | (src_enc & 7));
}
+int Assembler::evex_prefix_and_encode_ndd(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
+ InstructionAttr *attributes, bool no_flags) {
+ attributes->set_is_evex_instruction();
+ return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes, /* src_is_gpr */ true, /* nds_is_ndd */ true, no_flags);
+}
+
+int Assembler::evex_prefix_and_encode_nf(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
+ InstructionAttr *attributes, bool no_flags) {
+ attributes->set_is_evex_instruction();
+ return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes, /* src_is_gpr */ true, /* nds_is_ndd */ false, no_flags);
+}
+
void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
VexOpcode opc, InstructionAttr *attributes) {
if (UseAVX > 0) {
@@ -12818,6 +13597,12 @@ void Assembler::incl(Register dst) {
emit_int8(0x40 | dst->encoding());
}
+void Assembler::eincl(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x40 | src->encoding());
+}
+
void Assembler::lea(Register dst, Address src) {
leal(dst, src);
}
@@ -13442,28 +14227,67 @@ void Assembler::addq(Address dst, int32_t imm32) {
emit_arith_operand(0x81, rax, dst, imm32);
}
+void Assembler::eaddq(Register dst, Address src, int32_t imm32, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith_operand(0x81, rax, src, imm32);
+}
+
void Assembler::addq(Address dst, Register src) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(dst, src), 0x01);
emit_operand(src, dst, 0);
}
+void Assembler::eaddq(Register dst, Address src1, Register src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x01);
+ emit_operand(src2, src1, 0);
+}
+
void Assembler::addq(Register dst, int32_t imm32) {
(void) prefixq_and_encode(dst->encoding());
emit_arith(0x81, 0xC0, dst, imm32);
}
+void Assembler::eaddq(Register dst, Register src, int32_t imm32, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith(0x81, 0xC0, src, imm32);
+}
+
void Assembler::addq(Register dst, Address src) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(src, dst), 0x03);
emit_operand(dst, src, 0);
}
+void Assembler::eaddq(Register dst, Register src1, Address src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x03);
+ emit_operand(src1, src2, 0);
+}
+
void Assembler::addq(Register dst, Register src) {
(void) prefixq_and_encode(dst->encoding(), src->encoding());
emit_arith(0x03, 0xC0, dst, src);
}
+void Assembler::eaddq(Register dst, Register src1, Register src2, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ // opcode matches gcc
+ emit_arith(0x01, 0xC0, src1, src2);
+}
+
void Assembler::adcxq(Register dst, Register src) {
//assert(VM_Version::supports_adx(), "adx instructions not supported");
if (needs_rex2(dst, src)) {
@@ -13480,6 +14304,12 @@ void Assembler::adcxq(Register dst, Register src) {
}
}
+void Assembler::eadcxq(Register dst, Register src1, Register src2) {
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3C, &attributes);
+ emit_int16((unsigned char)0x66, (0xC0 | encode));
+}
+
void Assembler::adoxq(Register dst, Register src) {
//assert(VM_Version::supports_adx(), "adx instructions not supported");
if (needs_rex2(dst, src)) {
@@ -13495,34 +14325,80 @@ void Assembler::adoxq(Register dst, Register src) {
(0xC0 | encode));
}
}
+
+void Assembler::eadoxq(Register dst, Register src1, Register src2) {
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_3C, &attributes);
+ emit_int16((unsigned char)0x66, (0xC0 | encode));
+}
+
void Assembler::andq(Address dst, int32_t imm32) {
InstructionMark im(this);
prefixq(dst);
emit_arith_operand(0x81, as_Register(4), dst, imm32);
}
+void Assembler::eandq(Register dst, Address src, int32_t imm32, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith_operand(0x81, as_Register(4), src, imm32);
+}
+
void Assembler::andq(Register dst, int32_t imm32) {
(void) prefixq_and_encode(dst->encoding());
emit_arith(0x81, 0xE0, dst, imm32);
}
+void Assembler::eandq(Register dst, Register src, int32_t imm32, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith(0x81, 0xE0, src, imm32);
+}
+
void Assembler::andq(Register dst, Address src) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(src, dst), 0x23);
emit_operand(dst, src, 0);
}
+void Assembler::eandq(Register dst, Register src1, Address src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x23);
+ emit_operand(src1, src2, 0);
+}
+
void Assembler::andq(Register dst, Register src) {
(void) prefixq_and_encode(dst->encoding(), src->encoding());
emit_arith(0x23, 0xC0, dst, src);
}
+void Assembler::eandq(Register dst, Register src1, Register src2, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ // opcode matches gcc
+ emit_arith(0x21, 0xC0, src1, src2);
+}
+
void Assembler::andq(Address dst, Register src) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(dst, src), 0x21);
emit_operand(src, dst, 0);
}
+void Assembler::eandq(Register dst, Address src1, Register src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x21);
+ emit_operand(src2, src1, 0);
+}
+
void Assembler::andnq(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -13656,6 +14532,12 @@ void Assembler::cmovq(Condition cc, Register dst, Register src) {
emit_opcode_prefix_and_encoding((0x40 | cc), 0xC0, encode);
}
+void Assembler::ecmovq(Condition cc, Register dst, Register src1, Register src2) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes);
+ emit_int16((0x40 | cc), (0xC0 | encode));
+}
+
void Assembler::cmovq(Condition cc, Register dst, Address src) {
InstructionMark im(this);
int prefix = get_prefixq(src, dst, true /* is_map1 */);
@@ -13663,6 +14545,15 @@ void Assembler::cmovq(Condition cc, Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::ecmovq(Condition cc, Register dst, Register src1, Address src2) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes);
+ emit_int8((0x40 | cc));
+ emit_operand(src1, src2, 0);
+}
+
void Assembler::cmpq(Address dst, int32_t imm32) {
InstructionMark im(this);
prefixq(dst);
@@ -13764,6 +14655,12 @@ void Assembler::decl(Register dst) {
emit_int16((unsigned char)0xFF, (0xC8 | encode));
}
+void Assembler::edecl(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xFF, (0xC8 | encode));
+}
+
void Assembler::decq(Register dst) {
// Don't use it directly. Use MacroAssembler::decrementq() instead.
// Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
@@ -13771,6 +14668,12 @@ void Assembler::decq(Register dst) {
emit_int16((unsigned char)0xFF, 0xC8 | encode);
}
+void Assembler::edecq(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xFF, (0xC8 | encode));
+}
+
void Assembler::decq(Address dst) {
// Don't use it directly. Use MacroAssembler::decrementq() instead.
InstructionMark im(this);
@@ -13778,6 +14681,15 @@ void Assembler::decq(Address dst) {
emit_operand(rcx, dst, 0);
}
+void Assembler::edecq(Register dst, Address src, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xFF);
+ emit_operand(rcx, src, 0);
+}
+
// can't use REX2
void Assembler::fxrstor(Address src) {
InstructionMark im(this);
@@ -13811,21 +14723,51 @@ void Assembler::idivq(Register src) {
emit_int16((unsigned char)0xF7, (0xF8 | encode));
}
+void Assembler::eidivq(Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xF7, (0xF8 | encode));
+}
+
void Assembler::divq(Register src) {
int encode = prefixq_and_encode(src->encoding());
emit_int16((unsigned char)0xF7, (0xF0 | encode));
}
+void Assembler::edivq(Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xF7, (0xF0 | encode));
+}
+
void Assembler::imulq(Register dst, Register src) {
int encode = prefixq_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
emit_opcode_prefix_and_encoding((unsigned char)0xAF, 0xC0, encode);
}
+void Assembler::eimulq(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xAF, (0xC0 | encode));
+}
+
+void Assembler::eimulq(Register dst, Register src1, Register src2, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xAF, (0xC0 | encode));
+}
+
void Assembler::imulq(Register src) {
int encode = prefixq_and_encode(src->encoding());
emit_int16((unsigned char)0xF7, (0xE8 | encode));
}
+void Assembler::eimulq(Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xF7, (0xE8 | encode));
+}
+
void Assembler::imulq(Register dst, Address src, int32_t value) {
InstructionMark im(this);
prefixq(src, dst);
@@ -13840,6 +14782,22 @@ void Assembler::imulq(Register dst, Address src, int32_t value) {
}
}
+void Assembler::eimulq(Register dst, Address src, int32_t value, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (is8bit(value)) {
+ emit_int8((unsigned char)0x6B);
+ emit_operand(dst, src, 1);
+ emit_int8(value);
+ } else {
+ emit_int8((unsigned char)0x69);
+ emit_operand(dst, src, 4);
+ emit_int32(value);
+ }
+}
+
void Assembler::imulq(Register dst, Register src, int value) {
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
if (is8bit(value)) {
@@ -13850,6 +14808,17 @@ void Assembler::imulq(Register dst, Register src, int value) {
}
}
+void Assembler::eimulq(Register dst, Register src, int value, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, /* src_is_gpr */ true, /* nds_is_ndd */ false, no_flags);
+ if (is8bit(value)) {
+ emit_int24(0x6B, (0xC0 | encode), (value & 0xFF));
+ } else {
+ emit_int16(0x69, (0xC0 | encode));
+ emit_int32(value);
+ }
+}
+
void Assembler::imulq(Register dst, Address src) {
InstructionMark im(this);
int prefix = get_prefixq(src, dst, true /* is_map1 */);
@@ -13857,6 +14826,23 @@ void Assembler::imulq(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::eimulq(Register dst, Address src, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes, /* nds_is_ndd */ false, no_flags);
+ emit_int8((unsigned char)0xAF);
+ emit_operand(dst, src, 0);
+}
+
+void Assembler::eimulq(Register dst, Register src1, Address src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xAF);
+ emit_operand(src1, src2, 0);
+}
+
void Assembler::incl(Register dst) {
// Don't use it directly. Use MacroAssembler::incrementl() instead.
// Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
@@ -13864,6 +14850,15 @@ void Assembler::incl(Register dst) {
emit_int16((unsigned char)0xFF, (0xC0 | encode));
}
+void Assembler::eincl(Register dst, Register src, bool no_flags) {
+ // Don't use it directly. Use MacroAssembler::incrementl() instead.
+ // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ // int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xFF, (0xC0 | encode));
+}
+
void Assembler::incq(Register dst) {
// Don't use it directly. Use MacroAssembler::incrementq() instead.
// Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
@@ -13871,6 +14866,14 @@ void Assembler::incq(Register dst) {
emit_int16((unsigned char)0xFF, (0xC0 | encode));
}
+void Assembler::eincq(Register dst, Register src, bool no_flags) {
+ // Don't use it directly. Use MacroAssembler::incrementq() instead.
+ // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xFF, (0xC0 | encode));
+}
+
void Assembler::incq(Address dst) {
// Don't use it directly. Use MacroAssembler::incrementq() instead.
InstructionMark im(this);
@@ -13878,6 +14881,16 @@ void Assembler::incq(Address dst) {
emit_operand(rax, dst, 0);
}
+void Assembler::eincq(Register dst, Address src, bool no_flags) {
+ // Don't use it directly. Use MacroAssembler::incrementq() instead.
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char) 0xFF);
+ emit_operand(rax, src, 0);
+}
+
void Assembler::lea(Register dst, Address src) {
leaq(dst, src);
}
@@ -13946,6 +14959,13 @@ void Assembler::lzcntq(Register dst, Register src) {
emit_opcode_prefix_and_encoding((unsigned char)0xBD, 0xC0, encode);
}
+void Assembler::elzcntq(Register dst, Register src, bool no_flags) {
+ assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xF5, (0xC0 | encode));
+}
+
void Assembler::lzcntq(Register dst, Address src) {
assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
InstructionMark im(this);
@@ -13955,6 +14975,16 @@ void Assembler::lzcntq(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::elzcntq(Register dst, Address src, bool no_flags) {
+ assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xF5);
+ emit_operand(dst, src, 0);
+}
+
void Assembler::movdq(XMMRegister dst, Register src) {
// table D-1 says MMX/SSE2
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -14077,11 +15107,26 @@ void Assembler::mulq(Address src) {
emit_operand(rsp, src, 0);
}
+void Assembler::emulq(Address src, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_nf(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0xF7);
+ emit_operand(rsp, src, 0);
+}
+
void Assembler::mulq(Register src) {
int encode = prefixq_and_encode(src->encoding());
emit_int16((unsigned char)0xF7, (0xE0 | encode));
}
+void Assembler::emulq(Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xF7, (0xE0 | encode));
+}
+
void Assembler::mulxq(Register dst1, Register dst2, Register src) {
assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -14094,17 +15139,38 @@ void Assembler::negq(Register dst) {
emit_int16((unsigned char)0xF7, (0xD8 | encode));
}
+void Assembler::enegq(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xF7, (0xD8 | encode));
+}
+
void Assembler::negq(Address dst) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xF7);
emit_operand(as_Register(3), dst, 0);
}
+void Assembler::enegq(Register dst, Address src, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xF7);
+ emit_operand(as_Register(3), src, 0);
+}
+
void Assembler::notq(Register dst) {
int encode = prefixq_and_encode(dst->encoding());
emit_int16((unsigned char)0xF7, (0xD0 | encode));
}
+void Assembler::enotq(Register dst, Register src) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes);
+ emit_int16((unsigned char)0xF7, (0xD0 | encode));
+}
+
void Assembler::btq(Register dst, Register src) {
int encode = prefixq_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */);
emit_opcode_prefix_and_encoding((unsigned char)0xA3, 0xC0, encode);
@@ -14141,33 +15207,78 @@ void Assembler::orq(Address dst, int32_t imm32) {
emit_arith_operand(0x81, as_Register(1), dst, imm32);
}
+void Assembler::eorq(Register dst, Address src, int32_t imm32, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith_operand(0x81, as_Register(1), src, imm32);
+}
+
void Assembler::orq(Address dst, Register src) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(dst, src), (unsigned char)0x09);
emit_operand(src, dst, 0);
}
+void Assembler::eorq(Register dst, Address src1, Register src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x09);
+ emit_operand(src2, src1, 0);
+}
+
void Assembler::orq(Register dst, int32_t imm32) {
(void) prefixq_and_encode(dst->encoding());
emit_arith(0x81, 0xC8, dst, imm32);
}
+void Assembler::eorq(Register dst, Register src, int32_t imm32, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith(0x81, 0xC8, src, imm32);
+}
+
void Assembler::orq_imm32(Register dst, int32_t imm32) {
(void) prefixq_and_encode(dst->encoding());
emit_arith_imm32(0x81, 0xC8, dst, imm32);
}
+void Assembler::eorq_imm32(Register dst, Register src, int32_t imm32, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith_imm32(0x81, 0xC8, src, imm32);
+}
+
void Assembler::orq(Register dst, Address src) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(src, dst), 0x0B);
emit_operand(dst, src, 0);
}
+void Assembler::eorq(Register dst, Register src1, Address src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x0B);
+ emit_operand(src1, src2, 0);
+}
+
void Assembler::orq(Register dst, Register src) {
(void) prefixq_and_encode(dst->encoding(), src->encoding());
emit_arith(0x0B, 0xC0, dst, src);
}
+void Assembler::eorq(Register dst, Register src1, Register src2, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ // opcode matches gcc
+ emit_arith(0x09, 0xC0, src1, src2);
+}
+
void Assembler::popcntq(Register dst, Address src) {
assert(VM_Version::supports_popcnt(), "must support");
InstructionMark im(this);
@@ -14176,6 +15287,16 @@ void Assembler::popcntq(Register dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::epopcntq(Register dst, Address src, bool no_flags) {
+ assert(VM_Version::supports_popcnt(), "must support");
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char) 0x88);
+ emit_operand(dst, src, 0);
+}
+
void Assembler::popcntq(Register dst, Register src) {
assert(VM_Version::supports_popcnt(), "must support");
emit_int8((unsigned char)0xF3);
@@ -14183,6 +15304,13 @@ void Assembler::popcntq(Register dst, Register src) {
emit_opcode_prefix_and_encoding((unsigned char)0xB8, 0xC0, encode);
}
+void Assembler::epopcntq(Register dst, Register src, bool no_flags) {
+ assert(VM_Version::supports_popcnt(), "must support");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0x88, (0xC0 | encode));
+}
+
void Assembler::popq(Address dst) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0x8F);
@@ -14401,6 +15529,17 @@ void Assembler::rclq(Register dst, int imm8) {
}
}
+void Assembler::erclq(Register dst, Register src, int imm8) {
+ assert(isShiftCount(imm8 >> 1), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes);
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xD0 | encode));
+ } else {
+ emit_int24((unsigned char)0xC1, (0xD0 | encode), imm8);
+ }
+}
+
void Assembler::rcrq(Register dst, int imm8) {
assert(isShiftCount(imm8 >> 1), "illegal shift count");
int encode = prefixq_and_encode(dst->encoding());
@@ -14411,6 +15550,17 @@ void Assembler::rcrq(Register dst, int imm8) {
}
}
+void Assembler::ercrq(Register dst, Register src, int imm8) {
+ assert(isShiftCount(imm8 >> 1), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes);
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xD8 | encode));
+ } else {
+ emit_int24((unsigned char)0xC1, (0xD8 | encode), imm8);
+ }
+}
+
void Assembler::rorxl(Register dst, Register src, int imm8) {
assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -14462,12 +15612,38 @@ void Assembler::salq(Address dst, int imm8) {
}
}
+void Assembler::esalq(Register dst, Address src, int imm8, bool no_flags) {
+ InstructionMark im(this);
+ assert(isShiftCount(imm8 >> 1), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int8((unsigned char)0xD1);
+ emit_operand(as_Register(4), src, 0);
+ }
+ else {
+ emit_int8((unsigned char)0xC1);
+ emit_operand(as_Register(4), src, 1);
+ emit_int8(imm8);
+ }
+}
+
void Assembler::salq(Address dst) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xD3);
emit_operand(as_Register(4), dst, 0);
}
+void Assembler::esalq(Register dst, Address src, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xD3);
+ emit_operand(as_Register(4), src, 0);
+}
+
void Assembler::salq(Register dst, int imm8) {
assert(isShiftCount(imm8 >> 1), "illegal shift count");
int encode = prefixq_and_encode(dst->encoding());
@@ -14478,11 +15654,28 @@ void Assembler::salq(Register dst, int imm8) {
}
}
+void Assembler::esalq(Register dst, Register src, int imm8, bool no_flags) {
+ assert(isShiftCount(imm8 >> 1), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xE0 | encode));
+ } else {
+ emit_int24((unsigned char)0xC1, (0xE0 | encode), imm8);
+ }
+}
+
void Assembler::salq(Register dst) {
int encode = prefixq_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xE0 | encode));
}
+void Assembler::esalq(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xD3, (0xE0 | encode));
+}
+
void Assembler::sarq(Address dst, int imm8) {
InstructionMark im(this);
assert(isShiftCount(imm8 >> 1), "illegal shift count");
@@ -14497,12 +15690,38 @@ void Assembler::sarq(Address dst, int imm8) {
}
}
+void Assembler::esarq(Register dst, Address src, int imm8, bool no_flags) {
+ assert(isShiftCount(imm8 >> 1), "illegal shift count");
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int8((unsigned char)0xD1);
+ emit_operand(as_Register(7), src, 0);
+ }
+ else {
+ emit_int8((unsigned char)0xC1);
+ emit_operand(as_Register(7), src, 1);
+ emit_int8(imm8);
+ }
+}
+
void Assembler::sarq(Address dst) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xD3);
emit_operand(as_Register(7), dst, 0);
}
+void Assembler::esarq(Register dst, Address src, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xD3);
+ emit_operand(as_Register(7), src, 0);
+}
+
void Assembler::sarq(Register dst, int imm8) {
assert(isShiftCount(imm8 >> 1), "illegal shift count");
int encode = prefixq_and_encode(dst->encoding());
@@ -14513,10 +15732,26 @@ void Assembler::sarq(Register dst, int imm8) {
}
}
+void Assembler::esarq(Register dst, Register src, int imm8, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xF8 | encode));
+ } else {
+ emit_int24((unsigned char)0xC1, (0xF8 | encode), imm8);
+ }
+}
+
void Assembler::sarq(Register dst) {
int encode = prefixq_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xF8 | encode));
}
+
+void Assembler::esarq(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xD3, (0xF8 | encode));
+}
#endif
void Assembler::sbbq(Address dst, int32_t imm32) {
@@ -14551,11 +15786,28 @@ void Assembler::shlq(Register dst, int imm8) {
}
}
+void Assembler::eshlq(Register dst, Register src, int imm8, bool no_flags) {
+ assert(isShiftCount(imm8 >> 1), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1 ) {
+ emit_int16((unsigned char)0xD1, (0xE0 | encode));
+ } else {
+ emit_int24((unsigned char)0xC1, (0xE0 | encode), imm8);
+ }
+}
+
void Assembler::shlq(Register dst) {
int encode = prefixq_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, (0xE0 | encode));
}
+void Assembler::eshlq(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xD3, (0xE0 | encode));
+}
+
void Assembler::shrq(Register dst, int imm8) {
assert(isShiftCount(imm8 >> 1), "illegal shift count");
int encode = prefixq_and_encode(dst->encoding());
@@ -14567,17 +15819,44 @@ void Assembler::shrq(Register dst, int imm8) {
}
}
+void Assembler::eshrq(Register dst, Register src, int imm8, bool no_flags) {
+ assert(isShiftCount(imm8 >> 1), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xE8 | encode));
+ }
+ else {
+ emit_int24((unsigned char)0xC1, (0xE8 | encode), imm8);
+ }
+}
+
void Assembler::shrq(Register dst) {
int encode = prefixq_and_encode(dst->encoding());
emit_int16((unsigned char)0xD3, 0xE8 | encode);
}
+void Assembler::eshrq(Register dst, Register src, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int16((unsigned char)0xD3, (0xE8 | encode));
+}
+
void Assembler::shrq(Address dst) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xD3);
emit_operand(as_Register(5), dst, 0);
}
+void Assembler::eshrq(Register dst, Address src, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8((unsigned char)0xD3);
+ emit_operand(as_Register(5), src, 0);
+}
+
void Assembler::shrq(Address dst, int imm8) {
InstructionMark im(this);
assert(isShiftCount(imm8 >> 1), "illegal shift count");
@@ -14592,40 +15871,102 @@ void Assembler::shrq(Address dst, int imm8) {
}
}
+void Assembler::eshrq(Register dst, Address src, int imm8, bool no_flags) {
+ InstructionMark im(this);
+ assert(isShiftCount(imm8 >> 1), "illegal shift count");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ if (imm8 == 1) {
+ emit_int8((unsigned char)0xD1);
+ emit_operand(as_Register(5), src, 0);
+ }
+ else {
+ emit_int8((unsigned char)0xC1);
+ emit_operand(as_Register(5), src, 1);
+ emit_int8(imm8);
+ }
+}
+
void Assembler::subq(Address dst, int32_t imm32) {
InstructionMark im(this);
prefixq(dst);
emit_arith_operand(0x81, rbp, dst, imm32);
}
+void Assembler::esubq(Register dst, Address src, int32_t imm32, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith_operand(0x81, rbp, src, imm32);
+}
+
void Assembler::subq(Address dst, Register src) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(dst, src), 0x29);
emit_operand(src, dst, 0);
}
+void Assembler::esubq(Register dst, Address src1, Register src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x29);
+ emit_operand(src2, src1, 0);
+}
+
void Assembler::subq(Register dst, int32_t imm32) {
(void) prefixq_and_encode(dst->encoding());
emit_arith(0x81, 0xE8, dst, imm32);
}
+void Assembler::esubq(Register dst, Register src, int32_t imm32, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith(0x81, 0xE8, src, imm32);
+}
+
// Force generation of a 4 byte immediate value even if it fits into 8bit
void Assembler::subq_imm32(Register dst, int32_t imm32) {
(void) prefixq_and_encode(dst->encoding());
emit_arith_imm32(0x81, 0xE8, dst, imm32);
}
+void Assembler::esubq_imm32(Register dst, Register src, int32_t imm32, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith_imm32(0x81, 0xE8, src, imm32);
+}
+
void Assembler::subq(Register dst, Address src) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(src, dst), 0x2B);
emit_operand(dst, src, 0);
}
+void Assembler::esubq(Register dst, Register src1, Address src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x2B);
+ emit_operand(src1, src2, 0);
+}
+
void Assembler::subq(Register dst, Register src) {
(void) prefixq_and_encode(dst->encoding(), src->encoding());
emit_arith(0x2B, 0xC0, dst, src);
}
+void Assembler::esubq(Register dst, Register src1, Register src2, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ // opcode matches gcc
+ emit_arith(0x29, 0xC0, src1, src2);
+}
+
void Assembler::testq(Address dst, int32_t imm32) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xF7);
@@ -14683,29 +16024,77 @@ void Assembler::xorq(Register dst, Register src) {
emit_arith(0x33, 0xC0, dst, src);
}
+void Assembler::exorq(Register dst, Register src1, Register src2, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ // opcode matches gcc
+ emit_arith(0x31, 0xC0, src1, src2);
+}
+
void Assembler::xorq(Register dst, Address src) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(src, dst), 0x33);
emit_operand(dst, src, 0);
}
+void Assembler::exorq(Register dst, Register src1, Address src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x33);
+ emit_operand(src1, src2, 0);
+}
+
void Assembler::xorq(Register dst, int32_t imm32) {
(void) prefixq_and_encode(dst->encoding());
emit_arith(0x81, 0xF0, dst, imm32);
}
+void Assembler::exorq(Register dst, Register src, int32_t imm32, bool no_flags) {
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith(0x81, 0xF0, src, imm32);
+}
+
void Assembler::xorq(Address dst, int32_t imm32) {
InstructionMark im(this);
prefixq(dst);
emit_arith_operand(0x81, as_Register(6), dst, imm32);
}
+void Assembler::exorq(Register dst, Address src, int32_t imm32, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_arith_operand(0x81, as_Register(6), src, imm32);
+}
+
void Assembler::xorq(Address dst, Register src) {
InstructionMark im(this);
emit_prefix_and_int8(get_prefixq(dst, src), 0x31);
emit_operand(src, dst, 0);
}
+void Assembler::esetzucc(Condition cc, Register dst) {
+ assert(VM_Version::supports_apx_f(), "");
+ assert(0 <= cc && cc < 16, "illegal cc");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ // Encoding Format : eevex_prefix (4 bytes) | opcode_cc | modrm
+ int encode = evex_prefix_and_encode_ndd(0, 0, dst->encoding(), VEX_SIMD_F2, /* MAP4 */VEX_OPCODE_0F_3C, &attributes);
+ emit_opcode_prefix_and_encoding((0x40 | cc), 0xC0, encode);
+}
+
+void Assembler::exorq(Register dst, Address src1, Register src2, bool no_flags) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+ evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
+ emit_int8(0x31);
+ emit_operand(src2, src1, 0);
+}
+
#endif // !LP64
void InstructionAttr::set_address_attributes(int tuple_type, int input_size_in_bits) {
diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp
index 9d1a12ca8e5c8..eace7bb9cc169 100644
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@@ -789,14 +789,26 @@ class Assembler : public AbstractAssembler {
void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_v, bool evex_r, bool evex_b,
- bool eevex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
+ bool eevex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool no_flags = false);
+
+ void evex_prefix_ndd(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
+ InstructionAttr *attributes, bool no_flags = false);
+
+ void evex_prefix_nf(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
+ InstructionAttr *attributes, bool no_flags = false);
void vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
- InstructionAttr *attributes);
+ InstructionAttr *attributes, bool nds_is_ndd = false, bool no_flags = false);
int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
VexSimdPrefix pre, VexOpcode opc,
- InstructionAttr *attributes, bool src_is_gpr = false);
+ InstructionAttr *attributes, bool src_is_gpr = false, bool nds_is_ndd = false, bool no_flags = false);
+
+ int evex_prefix_and_encode_ndd(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
+ InstructionAttr *attributes, bool no_flags = false);
+
+ int evex_prefix_and_encode_nf(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
+ InstructionAttr *attributes, bool no_flags = false);
void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
VexOpcode opc, InstructionAttr *attributes);
@@ -941,13 +953,20 @@ class Assembler : public AbstractAssembler {
// the product flag UseIncDec value.
void decl(Register dst);
+ void edecl(Register dst, Register src, bool no_flags);
void decl(Address dst);
+ void edecl(Register dst, Address src, bool no_flags);
void decq(Address dst);
+ void edecq(Register dst, Address src, bool no_flags);
void incl(Register dst);
+ void eincl(Register dst, Register src, bool no_flags);
void incl(Address dst);
+ void eincl(Register dst, Address src, bool no_flags);
void incq(Register dst);
+ void eincq(Register dst, Register src, bool no_flags);
void incq(Address dst);
+ void eincq(Register dst, Address src, bool no_flags);
// New cpus require use of movsd and movss to avoid partial register stall
// when loading from memory. But for old Opteron use movlpd instead of movsd.
@@ -1020,7 +1039,7 @@ class Assembler : public AbstractAssembler {
void pusha_uncached();
void popa_uncached();
- // APX ISA extensions for register save/restore optimizations.
+ // APX ISA Extensions for register save/restore optimizations.
void push2(Register src1, Register src2, bool with_ppx = false);
void pop2(Register src1, Register src2, bool with_ppx = false);
void push2p(Register src1, Register src2);
@@ -1028,9 +1047,13 @@ class Assembler : public AbstractAssembler {
void pushp(Register src);
void popp(Register src);
+ // New Zero Upper setcc instruction.
+ void esetzucc(Condition cc, Register dst);
+
#endif
void vzeroupper_uncached();
void decq(Register dst);
+ void edecq(Register dst, Register src, bool no_flags);
void pusha();
void popa();
@@ -1072,23 +1095,35 @@ class Assembler : public AbstractAssembler {
void addw(Address dst, Register src);
void addl(Address dst, int32_t imm32);
+ void eaddl(Register dst, Address src, int32_t imm32, bool no_flags);
void addl(Address dst, Register src);
+ void eaddl(Register dst, Address src1, Register src2, bool no_flags);
void addl(Register dst, int32_t imm32);
+ void eaddl(Register dst, Register src, int32_t imm32, bool no_flags);
void addl(Register dst, Address src);
+ void eaddl(Register dst, Register src1, Address src2, bool no_flags);
void addl(Register dst, Register src);
+ void eaddl(Register dst, Register src1, Register src2, bool no_flags);
void addq(Address dst, int32_t imm32);
+ void eaddq(Register dst, Address src, int32_t imm32, bool no_flags);
void addq(Address dst, Register src);
+ void eaddq(Register dst, Address src1, Register src2, bool no_flags);
void addq(Register dst, int32_t imm32);
+ void eaddq(Register dst, Register src, int32_t imm32, bool no_flags);
void addq(Register dst, Address src);
+ void eaddq(Register dst, Register src1, Address src2, bool no_flags);
void addq(Register dst, Register src);
+ void eaddq(Register dst, Register src1, Register src2, bool no_flags);
#ifdef _LP64
//Add Unsigned Integers with Carry Flag
void adcxq(Register dst, Register src);
+ void eadcxq(Register dst, Register src1, Register src2);
//Add Unsigned Integers with Overflow Flag
void adoxq(Register dst, Register src);
+ void eadoxq(Register dst, Register src1, Register src2);
#endif
void addr_nop_4();
@@ -1122,16 +1157,25 @@ class Assembler : public AbstractAssembler {
void andb(Address dst, Register src);
void andl(Address dst, int32_t imm32);
+ void eandl(Register dst, Address src, int32_t imm32, bool no_flags);
void andl(Register dst, int32_t imm32);
+ void eandl(Register dst, Register src, int32_t imm32, bool no_flags);
void andl(Register dst, Address src);
+ void eandl(Register dst, Register src1, Address src2, bool no_flags);
void andl(Register dst, Register src);
+ void eandl(Register dst, Register src1, Register src2, bool no_flags);
void andl(Address dst, Register src);
void andq(Address dst, int32_t imm32);
+ void eandq(Register dst, Address src, int32_t imm32, bool no_flags);
void andq(Register dst, int32_t imm32);
+ void eandq(Register dst, Register src, int32_t imm32, bool no_flags);
void andq(Register dst, Address src);
+ void eandq(Register dst, Register src1, Address src2, bool no_flags);
void andq(Register dst, Register src);
+ void eandq(Register dst, Register src1, Register src2, bool no_flags);
void andq(Address dst, Register src);
+ void eandq(Register dst, Address src1, Register src2, bool no_flags);
// BMI instructions
void andnl(Register dst, Register src1, Register src2);
@@ -1182,15 +1226,20 @@ class Assembler : public AbstractAssembler {
void clwb(Address adr);
void cmovl(Condition cc, Register dst, Register src);
+ void ecmovl(Condition cc, Register dst, Register src1, Register src2);
void cmovl(Condition cc, Register dst, Address src);
+ void ecmovl(Condition cc, Register dst, Register src1, Address src2);
void cmovq(Condition cc, Register dst, Register src);
+ void ecmovq(Condition cc, Register dst, Register src1, Register src2);
void cmovq(Condition cc, Register dst, Address src);
+ void ecmovq(Condition cc, Register dst, Register src1, Address src2);
void cmpb(Address dst, int imm8);
void cmpb(Address dst, Register reg);
void cmpb(Register reg, Address dst);
+ void cmpb(Register reg, int imm8);
void cmpl(Address dst, int32_t imm32);
void cmpl(Register dst, int32_t imm32);
@@ -1488,25 +1537,41 @@ class Assembler : public AbstractAssembler {
void hlt();
void idivl(Register src);
+ void eidivl(Register src, bool no_flags);
void divl(Register src); // Unsigned division
+ void edivl(Register src, bool no_flags); // Unsigned division
#ifdef _LP64
void idivq(Register src);
+ void eidivq(Register src, bool no_flags);
void divq(Register src); // Unsigned division
+ void edivq(Register src, bool no_flags); // Unsigned division
#endif
void imull(Register src);
+ void eimull(Register src, bool no_flags);
void imull(Register dst, Register src);
+ void eimull(Register dst, Register src1, Register src2, bool no_flags);
void imull(Register dst, Register src, int value);
+ void eimull(Register dst, Register src, int value, bool no_flags);
void imull(Register dst, Address src, int value);
+ void eimull(Register dst, Address src, int value, bool no_flags);
void imull(Register dst, Address src);
+ void eimull(Register dst, Register src1, Address src2, bool no_flags);
#ifdef _LP64
void imulq(Register dst, Register src);
+ void eimulq(Register dst, Register src, bool no_flags);
+ void eimulq(Register dst, Register src1, Register src2, bool no_flags);
void imulq(Register dst, Register src, int value);
+ void eimulq(Register dst, Register src, int value, bool no_flags);
void imulq(Register dst, Address src, int value);
+ void eimulq(Register dst, Address src, int value, bool no_flags);
void imulq(Register dst, Address src);
+ void eimulq(Register dst, Address src, bool no_flags);
+ void eimulq(Register dst, Register src1, Address src2, bool no_flags);
void imulq(Register dst);
+ void eimulq(Register dst, bool no_flags);
#endif
// jcc is the generic conditional branch generator to run-
@@ -1565,11 +1630,15 @@ class Assembler : public AbstractAssembler {
void size_prefix();
void lzcntl(Register dst, Register src);
+ void elzcntl(Register dst, Register src, bool no_flags);
void lzcntl(Register dst, Address src);
+ void elzcntl(Register dst, Address src, bool no_flags);
#ifdef _LP64
void lzcntq(Register dst, Register src);
+ void elzcntq(Register dst, Register src, bool no_flags);
void lzcntq(Register dst, Address src);
+ void elzcntq(Register dst, Address src, bool no_flags);
#endif
enum Membar_mask_bits {
@@ -1785,11 +1854,15 @@ class Assembler : public AbstractAssembler {
// Unsigned multiply with RAX destination register
void mull(Address src);
+ void emull(Address src, bool no_flags);
void mull(Register src);
+ void emull(Register src, bool no_flags);
#ifdef _LP64
void mulq(Address src);
+ void emulq(Address src, bool no_flags);
void mulq(Register src);
+ void emulq(Register src, bool no_flags);
void mulxq(Register dst1, Register dst2, Register src);
#endif
@@ -1802,19 +1875,25 @@ class Assembler : public AbstractAssembler {
void mulss(XMMRegister dst, XMMRegister src);
void negl(Register dst);
+ void enegl(Register dst, Register src, bool no_flags);
void negl(Address dst);
+ void enegl(Register dst, Address src, bool no_flags);
#ifdef _LP64
void negq(Register dst);
+ void enegq(Register dst, Register src, bool no_flags);
void negq(Address dst);
+ void enegq(Register dst, Address src, bool no_flags);
#endif
void nop(uint i = 1);
void notl(Register dst);
+ void enotl(Register dst, Register src);
#ifdef _LP64
void notq(Register dst);
+ void enotq(Register dst, Register src);
void btsq(Address dst, int imm8);
void btrq(Address dst, int imm8);
@@ -1822,21 +1901,37 @@ class Assembler : public AbstractAssembler {
#endif
void btq(Register dst, Register src);
+ void orw(Register dst, Register src);
+ void eorw(Register dst, Register src1, Register src2, bool no_flags);
+
void orl(Address dst, int32_t imm32);
+ void eorl(Register dst, Address src, int32_t imm32, bool no_flags);
void orl(Register dst, int32_t imm32);
+ void eorl(Register dst, Register src, int32_t imm32, bool no_flags);
void orl(Register dst, Address src);
+ void eorl(Register dst, Register src1, Address src2, bool no_flags);
void orl(Register dst, Register src);
+ void eorl(Register dst, Register src1, Register src2, bool no_flags);
void orl(Address dst, Register src);
+ void eorl(Register dst, Address src1, Register src2, bool no_flags);
void orb(Address dst, int imm8);
+ void eorb(Register dst, Address src, int imm8, bool no_flags);
void orb(Address dst, Register src);
+ void eorb(Register dst, Address src1, Register src2, bool no_flags);
void orq(Address dst, int32_t imm32);
+ void eorq(Register dst, Address src, int32_t imm32, bool no_flags);
void orq(Address dst, Register src);
+ void eorq(Register dst, Address src1, Register src2, bool no_flags);
void orq(Register dst, int32_t imm32);
+ void eorq(Register dst, Register src, int32_t imm32, bool no_flags);
void orq_imm32(Register dst, int32_t imm32);
+ void eorq_imm32(Register dst, Register src, int32_t imm32, bool no_flags);
void orq(Register dst, Address src);
+ void eorq(Register dst, Register src1, Address src2, bool no_flags);
void orq(Register dst, Register src);
+ void eorq(Register dst, Register src1, Register src2, bool no_flags);
// Pack with signed saturation
void packsswb(XMMRegister dst, XMMRegister src);
@@ -2022,7 +2117,9 @@ class Assembler : public AbstractAssembler {
#endif
void popcntl(Register dst, Address src);
+ void epopcntl(Register dst, Address src, bool no_flags);
void popcntl(Register dst, Register src);
+ void epopcntl(Register dst, Register src, bool no_flags);
void evpopcntb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpopcntw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
@@ -2031,7 +2128,9 @@ class Assembler : public AbstractAssembler {
#ifdef _LP64
void popcntq(Register dst, Address src);
+ void epopcntq(Register dst, Address src, bool no_flags);
void popcntq(Register dst, Register src);
+ void epopcntq(Register dst, Register src, bool no_flags);
#endif
// Prefetches (SSE, SSE2, 3DNOW only)
@@ -2131,10 +2230,13 @@ class Assembler : public AbstractAssembler {
void pushq(Address src);
void rcll(Register dst, int imm8);
+ void ercll(Register dst, Register src, int imm8);
void rclq(Register dst, int imm8);
+ void erclq(Register dst, Register src, int imm8);
void rcrq(Register dst, int imm8);
+ void ercrq(Register dst, Register src, int imm8);
void rcpps(XMMRegister dst, XMMRegister src);
@@ -2145,18 +2247,26 @@ class Assembler : public AbstractAssembler {
void ret(int imm16);
void roll(Register dst);
+ void eroll(Register dst, Register src, bool no_flags);
void roll(Register dst, int imm8);
+ void eroll(Register dst, Register src, int imm8, bool no_flags);
void rorl(Register dst);
+ void erorl(Register dst, Register src, bool no_flags);
void rorl(Register dst, int imm8);
+ void erorl(Register dst, Register src, int imm8, bool no_flags);
#ifdef _LP64
void rolq(Register dst);
+ void erolq(Register dst, Register src, bool no_flags);
void rolq(Register dst, int imm8);
+ void erolq(Register dst, Register src, int imm8, bool no_flags);
void rorq(Register dst);
+ void erorq(Register dst, Register src, bool no_flags);
void rorq(Register dst, int imm8);
+ void erorq(Register dst, Register src, int imm8, bool no_flags);
void rorxl(Register dst, Register src, int imm8);
void rorxl(Register dst, Address src, int imm8);
void rorxq(Register dst, Register src, int imm8);
@@ -2166,25 +2276,41 @@ class Assembler : public AbstractAssembler {
void sahf();
void sall(Register dst, int imm8);
+ void esall(Register dst, Register src, int imm8, bool no_flags);
void sall(Register dst);
+ void esall(Register dst, Register src, bool no_flags);
void sall(Address dst, int imm8);
+ void esall(Register dst, Address src, int imm8, bool no_flags);
void sall(Address dst);
+ void esall(Register dst, Address src, bool no_flags);
void sarl(Address dst, int imm8);
+ void esarl(Register dst, Address src, int imm8, bool no_flags);
void sarl(Address dst);
+ void esarl(Register dst, Address src, bool no_flags);
void sarl(Register dst, int imm8);
+ void esarl(Register dst, Register src, int imm8, bool no_flags);
void sarl(Register dst);
+ void esarl(Register dst, Register src, bool no_flags);
#ifdef _LP64
void salq(Register dst, int imm8);
+ void esalq(Register dst, Register src, int imm8, bool no_flags);
void salq(Register dst);
+ void esalq(Register dst, Register src, bool no_flags);
void salq(Address dst, int imm8);
+ void esalq(Register dst, Address src, int imm8, bool no_flags);
void salq(Address dst);
+ void esalq(Register dst, Address src, bool no_flags);
void sarq(Address dst, int imm8);
+ void esarq(Register dst, Address src, int imm8, bool no_flags);
void sarq(Address dst);
+ void esarq(Register dst, Address src, bool no_flags);
void sarq(Register dst, int imm8);
+ void esarq(Register dst, Register src, int imm8, bool no_flags);
void sarq(Register dst);
+ void esarq(Register dst, Register src, bool no_flags);
#endif
void sbbl(Address dst, int32_t imm32);
@@ -2216,29 +2342,47 @@ class Assembler : public AbstractAssembler {
void sha256msg2(XMMRegister dst, XMMRegister src);
void shldl(Register dst, Register src);
+ void eshldl(Register dst, Register src1, Register src2, bool no_flags);
void shldl(Register dst, Register src, int8_t imm8);
+ void eshldl(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags);
void shrdl(Register dst, Register src);
+ void eshrdl(Register dst, Register src1, Register src2, bool no_flags);
void shrdl(Register dst, Register src, int8_t imm8);
+ void eshrdl(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags);
#ifdef _LP64
void shldq(Register dst, Register src, int8_t imm8);
+ void eshldq(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags);
void shrdq(Register dst, Register src, int8_t imm8);
+ void eshrdq(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags);
#endif
void shll(Register dst, int imm8);
+ void eshll(Register dst, Register src, int imm8, bool no_flags);
void shll(Register dst);
+ void eshll(Register dst, Register src, bool no_flags);
void shlq(Register dst, int imm8);
+ void eshlq(Register dst, Register src, int imm8, bool no_flags);
void shlq(Register dst);
+ void eshlq(Register dst, Register src, bool no_flags);
void shrl(Register dst, int imm8);
+ void eshrl(Register dst, Register src, int imm8, bool no_flags);
void shrl(Register dst);
+ void eshrl(Register dst, Register src, bool no_flags);
void shrl(Address dst);
+ void eshrl(Register dst, Address src, bool no_flags);
void shrl(Address dst, int imm8);
+ void eshrl(Register dst, Address src, int imm8, bool no_flags);
void shrq(Register dst, int imm8);
+ void eshrq(Register dst, Register src, int imm8, bool no_flags);
void shrq(Register dst);
+ void eshrq(Register dst, Register src, bool no_flags);
void shrq(Address dst);
+ void eshrq(Register dst, Address src, bool no_flags);
void shrq(Address dst, int imm8);
+ void eshrq(Register dst, Address src, int imm8, bool no_flags);
void smovl(); // QQQ generic?
@@ -2258,20 +2402,32 @@ class Assembler : public AbstractAssembler {
void stmxcsr( Address dst );
void subl(Address dst, int32_t imm32);
+ void esubl(Register dst, Address src, int32_t imm32, bool no_flags);
void subl(Address dst, Register src);
+ void esubl(Register dst, Address src1, Register src2, bool no_flags);
void subl(Register dst, int32_t imm32);
+ void esubl(Register dst, Register src, int32_t imm32, bool no_flags);
void subl(Register dst, Address src);
+ void esubl(Register dst, Register src1, Address src2, bool no_flags);
void subl(Register dst, Register src);
+ void esubl(Register dst, Register src1, Register src2, bool no_flags);
void subq(Address dst, int32_t imm32);
+ void esubq(Register dst, Address src, int32_t imm32, bool no_flags);
void subq(Address dst, Register src);
+ void esubq(Register dst, Address src1, Register src2, bool no_flags);
void subq(Register dst, int32_t imm32);
+ void esubq(Register dst, Register src, int32_t imm32, bool no_flags);
void subq(Register dst, Address src);
+ void esubq(Register dst, Register src1, Address src2, bool no_flags);
void subq(Register dst, Register src);
+ void esubq(Register dst, Register src1, Register src2, bool no_flags);
// Force generation of a 4 byte immediate value even if it fits into 8bit
void subl_imm32(Register dst, int32_t imm32);
+ void esubl_imm32(Register dst, Register src, int32_t imm32, bool no_flags);
void subq_imm32(Register dst, int32_t imm32);
+ void esubq_imm32(Register dst, Register src, int32_t imm32, bool no_flags);
// Subtract Scalar Double-Precision Floating-Point Values
void subsd(XMMRegister dst, Address src);
@@ -2296,9 +2452,13 @@ class Assembler : public AbstractAssembler {
// BMI - count trailing zeros
void tzcntl(Register dst, Register src);
+ void etzcntl(Register dst, Register src, bool no_flags);
void tzcntl(Register dst, Address src);
+ void etzcntl(Register dst, Address src, bool no_flags);
void tzcntq(Register dst, Register src);
+ void etzcntq(Register dst, Register src, bool no_flags);
void tzcntq(Register dst, Address src);
+ void etzcntq(Register dst, Address src, bool no_flags);
// Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
void ucomisd(XMMRegister dst, Address src);
@@ -2331,20 +2491,33 @@ class Assembler : public AbstractAssembler {
void xgetbv();
void xorl(Register dst, int32_t imm32);
+ void exorl(Register dst, Register src, int32_t imm32, bool no_flags);
void xorl(Address dst, int32_t imm32);
+ void exorl(Register dst, Address src, int32_t imm32, bool no_flags);
void xorl(Register dst, Address src);
+ void exorl(Register dst, Register src1, Address src2, bool no_flags);
void xorl(Register dst, Register src);
+ void exorl(Register dst, Register src1, Register src2, bool no_flags);
void xorl(Address dst, Register src);
+ void exorl(Register dst, Address src1, Register src2, bool no_flags);
void xorb(Address dst, Register src);
+ void exorb(Register dst, Address src1, Register src2, bool no_flags);
void xorb(Register dst, Address src);
+ void exorb(Register dst, Register src1, Address src2, bool no_flags);
void xorw(Register dst, Address src);
+ void exorw(Register dst, Register src1, Address src2, bool no_flags);
void xorq(Register dst, Address src);
+ void exorq(Register dst, Register src1, Address src2, bool no_flags);
void xorq(Address dst, int32_t imm32);
+ void exorq(Register dst, Address src, int32_t imm32, bool no_flags);
void xorq(Register dst, Register src);
+ void exorq(Register dst, Register src1, Register src2, bool no_flags);
void xorq(Register dst, int32_t imm32);
+ void exorq(Register dst, Register src, int32_t imm32, bool no_flags);
void xorq(Address dst, Register src);
+ void exorq(Register dst, Address src1, Register src2, bool no_flags);
// AVX 3-operands scalar instructions (encoded with VEX prefix)
@@ -2459,6 +2632,7 @@ class Assembler : public AbstractAssembler {
// Bitwise Logical AND of Packed Floating-Point Values
void andpd(XMMRegister dst, XMMRegister src);
+ void andnpd(XMMRegister dst, XMMRegister src);
void andps(XMMRegister dst, XMMRegister src);
void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -2813,6 +2987,7 @@ class Assembler : public AbstractAssembler {
void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
+ void evinserti64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8, int vector_len);
// vinsertf forms
void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
@@ -2862,6 +3037,7 @@ class Assembler : public AbstractAssembler {
void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
void vbroadcastsd(XMMRegister dst, Address src, int vector_len);
void vbroadcastf128(XMMRegister dst, Address src, int vector_len);
+ void evbroadcastf64x2(XMMRegister dst, Address src, int vector_len);
// gpr sourced byte/word/dword/qword replicate
void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
diff --git a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp
index 7d89b148ba22f..71ca9351f86c9 100644
--- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -110,7 +110,7 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
Metadata *m = _method->as_constant_ptr()->as_metadata();
ce->store_parameter(m, 1);
ce->store_parameter(_bci, 0);
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
+ __ call(RuntimeAddress(Runtime1::entry_for(C1StubId::counter_overflow_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
__ jmp(_continuation);
@@ -119,7 +119,7 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
void RangeCheckStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
if (_info->deoptimize_on_exception()) {
- address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+ address a = Runtime1::entry_for(C1StubId::predicate_failed_trap_id);
__ call(RuntimeAddress(a));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
@@ -133,11 +133,11 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
} else {
ce->store_parameter(_index->as_jint(), 0);
}
- Runtime1::StubID stub_id;
+ C1StubId stub_id;
if (_throw_index_out_of_bounds_exception) {
- stub_id = Runtime1::throw_index_exception_id;
+ stub_id = C1StubId::throw_index_exception_id;
} else {
- stub_id = Runtime1::throw_range_check_failed_id;
+ stub_id = C1StubId::throw_range_check_failed_id;
ce->store_parameter(_array->as_pointer_register(), 1);
}
__ call(RuntimeAddress(Runtime1::entry_for(stub_id)));
@@ -152,7 +152,7 @@ PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
- address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+ address a = Runtime1::entry_for(C1StubId::predicate_failed_trap_id);
__ call(RuntimeAddress(a));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
@@ -164,7 +164,7 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) {
ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
}
__ bind(_entry);
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::throw_div0_exception_id)));
+ __ call(RuntimeAddress(Runtime1::entry_for(C1StubId::throw_div0_exception_id)));
ce->add_call_info_here(_info);
debug_only(__ should_not_reach_here());
}
@@ -172,14 +172,14 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) {
// Implementation of NewInstanceStub
-NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, C1StubId stub_id) {
_result = result;
_klass = klass;
_klass_reg = klass_reg;
_info = new CodeEmitInfo(info);
- assert(stub_id == Runtime1::new_instance_id ||
- stub_id == Runtime1::fast_new_instance_id ||
- stub_id == Runtime1::fast_new_instance_init_check_id,
+ assert(stub_id == C1StubId::new_instance_id ||
+ stub_id == C1StubId::fast_new_instance_id ||
+ stub_id == C1StubId::fast_new_instance_init_check_id,
"need new_instance id");
_stub_id = stub_id;
}
@@ -212,7 +212,7 @@ void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
assert(_length->as_register() == rbx, "length must in rbx,");
assert(_klass_reg->as_register() == rdx, "klass_reg must in rdx");
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id)));
+ __ call(RuntimeAddress(Runtime1::entry_for(C1StubId::new_type_array_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
assert(_result->as_register() == rax, "result must in rax,");
@@ -235,7 +235,7 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
assert(_length->as_register() == rbx, "length must in rbx,");
assert(_klass_reg->as_register() == rdx, "klass_reg must in rdx");
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
+ __ call(RuntimeAddress(Runtime1::entry_for(C1StubId::new_object_array_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
assert(_result->as_register() == rax, "result must in rax,");
@@ -247,11 +247,11 @@ void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
ce->store_parameter(_obj_reg->as_register(), 1);
ce->store_parameter(_lock_reg->as_register(), 0);
- Runtime1::StubID enter_id;
+ C1StubId enter_id;
if (ce->compilation()->has_fpu_code()) {
- enter_id = Runtime1::monitorenter_id;
+ enter_id = C1StubId::monitorenter_id;
} else {
- enter_id = Runtime1::monitorenter_nofpu_id;
+ enter_id = C1StubId::monitorenter_nofpu_id;
}
__ call(RuntimeAddress(Runtime1::entry_for(enter_id)));
ce->add_call_info_here(_info);
@@ -268,11 +268,11 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
}
ce->store_parameter(_lock_reg->as_register(), 0);
// note: non-blocking leaf routine => no call info needed
- Runtime1::StubID exit_id;
+ C1StubId exit_id;
if (ce->compilation()->has_fpu_code()) {
- exit_id = Runtime1::monitorexit_id;
+ exit_id = C1StubId::monitorexit_id;
} else {
- exit_id = Runtime1::monitorexit_nofpu_id;
+ exit_id = C1StubId::monitorexit_nofpu_id;
}
__ call(RuntimeAddress(Runtime1::entry_for(exit_id)));
__ jmp(_continuation);
@@ -407,10 +407,10 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
address target = nullptr;
relocInfo::relocType reloc_type = relocInfo::none;
switch (_id) {
- case access_field_id: target = Runtime1::entry_for(Runtime1::access_field_patching_id); break;
- case load_klass_id: target = Runtime1::entry_for(Runtime1::load_klass_patching_id); reloc_type = relocInfo::metadata_type; break;
- case load_mirror_id: target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); reloc_type = relocInfo::oop_type; break;
- case load_appendix_id: target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); reloc_type = relocInfo::oop_type; break;
+ case access_field_id: target = Runtime1::entry_for(C1StubId::access_field_patching_id); break;
+ case load_klass_id: target = Runtime1::entry_for(C1StubId::load_klass_patching_id); reloc_type = relocInfo::metadata_type; break;
+ case load_mirror_id: target = Runtime1::entry_for(C1StubId::load_mirror_patching_id); reloc_type = relocInfo::oop_type; break;
+ case load_appendix_id: target = Runtime1::entry_for(C1StubId::load_appendix_patching_id); reloc_type = relocInfo::oop_type; break;
default: ShouldNotReachHere();
}
__ bind(call_patch);
@@ -440,7 +440,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
ce->store_parameter(_trap_request, 0);
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
+ __ call(RuntimeAddress(Runtime1::entry_for(C1StubId::deoptimize_id)));
ce->add_call_info_here(_info);
DEBUG_ONLY(__ should_not_reach_here());
}
@@ -450,9 +450,9 @@ void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
address a;
if (_info->deoptimize_on_exception()) {
// Deoptimize, do not throw the exception, because it is probably wrong to do it here.
- a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+ a = Runtime1::entry_for(C1StubId::predicate_failed_trap_id);
} else {
- a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+ a = Runtime1::entry_for(C1StubId::throw_null_pointer_exception_id);
}
ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
index e2fde10b98d86..c3444d5a5abce 100644
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
@@ -399,7 +399,7 @@ int LIR_Assembler::emit_exception_handler() {
__ verify_not_null_oop(rax);
// search an exception handler (rax: exception oop, rdx: throwing pc)
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
+ __ call(RuntimeAddress(Runtime1::entry_for(C1StubId::handle_exception_from_callee_id)));
__ should_not_reach_here();
guarantee(code_offset() - offset <= exception_handler_size(), "overflow");
__ end_a_stub();
@@ -463,7 +463,7 @@ int LIR_Assembler::emit_unwind_handler() {
// remove the activation and dispatch to the unwind handler
__ remove_frame(initial_frame_size_in_bytes());
- __ jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
+ __ jump(RuntimeAddress(Runtime1::entry_for(C1StubId::unwind_exception_id)));
// Emit the slow path assembly
if (stub != nullptr) {
@@ -1566,7 +1566,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
// instruction sequence too long to inline it here
{
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::fpu2long_stub_id)));
+ __ call(RuntimeAddress(Runtime1::entry_for(C1StubId::fpu2long_stub_id)));
}
break;
#endif // _LP64
@@ -1781,7 +1781,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
#else
__ pushklass(k->constant_encoding(), noreg);
#endif // _LP64
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id)));
__ pop(klass_RInfo);
__ pop(klass_RInfo);
// result is a boolean
@@ -1795,7 +1795,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
// call out-of-line instance of __ check_klass_subtype_slow_path(...):
__ push(klass_RInfo);
__ push(k_RInfo);
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id)));
__ pop(klass_RInfo);
__ pop(k_RInfo);
// result is a boolean
@@ -1874,7 +1874,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
// call out-of-line instance of __ check_klass_subtype_slow_path(...):
__ push(klass_RInfo);
__ push(k_RInfo);
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id)));
__ pop(klass_RInfo);
__ pop(k_RInfo);
// result is a boolean
@@ -2893,7 +2893,7 @@ void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmit
// exception object is not added to oop map by LinearScan
// (LinearScan assumes that no oops are in fixed registers)
info->add_register_oop(exceptionOop);
- Runtime1::StubID unwind_id;
+ C1StubId unwind_id;
// get current pc information
// pc is only needed if the method has an exception handler, the unwind code does not need it.
@@ -2905,9 +2905,9 @@ void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmit
__ verify_not_null_oop(rax);
// search an exception handler (rax: exception oop, rdx: throwing pc)
if (compilation()->has_fpu_code()) {
- unwind_id = Runtime1::handle_exception_id;
+ unwind_id = C1StubId::handle_exception_id;
} else {
- unwind_id = Runtime1::handle_exception_nofpu_id;
+ unwind_id = C1StubId::handle_exception_nofpu_id;
}
__ call(RuntimeAddress(Runtime1::entry_for(unwind_id)));
@@ -3262,7 +3262,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
__ push(src);
__ push(dst);
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id)));
__ pop(dst);
__ pop(src);
diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
index d3add6975b4f2..36e2021138f2e 100644
--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
@@ -807,7 +807,11 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
- x->id() == vmIntrinsics::_dlog10) {
+ x->id() == vmIntrinsics::_dlog10
+#ifdef _LP64
+ || x->id() == vmIntrinsics::_dtanh
+#endif
+ ) {
do_LibmIntrinsic(x);
return;
}
@@ -989,11 +993,17 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
break;
case vmIntrinsics::_dtan:
if (StubRoutines::dtan() != nullptr) {
- __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
+ __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
}
break;
+ case vmIntrinsics::_dtanh:
+ assert(StubRoutines::dtanh() != nullptr, "tanh intrinsic not found");
+ if (StubRoutines::dtanh() != nullptr) {
+ __ call_runtime_leaf(StubRoutines::dtanh(), getThreadTemp(), result_reg, cc->args());
+ }
+ break;
default: ShouldNotReachHere();
}
#endif // _LP64
@@ -1430,7 +1440,7 @@ void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
args->append(rank);
args->append(varargs);
LIR_Opr reg = result_register_for(x->type());
- __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
+ __ call_runtime(Runtime1::entry_for(C1StubId::new_multi_array_id),
LIR_OprFact::illegalOpr,
reg, args, info);
@@ -1463,12 +1473,12 @@ void LIRGenerator::do_CheckCast(CheckCast* x) {
CodeStub* stub;
if (x->is_incompatible_class_change_check()) {
assert(patching_info == nullptr, "can't patch this");
- stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception);
+ stub = new SimpleExceptionStub(C1StubId::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception);
} else if (x->is_invokespecial_receiver_check()) {
assert(patching_info == nullptr, "can't patch this");
stub = new DeoptimizeStub(info_for_exception, Deoptimization::Reason_class_check, Deoptimization::Action_none);
} else {
- stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
+ stub = new SimpleExceptionStub(C1StubId::throw_class_cast_exception_id, obj.result(), info_for_exception);
}
LIR_Opr reg = rlock_result(x);
LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp
index 4dcacd00a6339..bf5b90db5fcb0 100644
--- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp
@@ -271,7 +271,7 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register
if (CURRENT_ENV->dtrace_alloc_probes()) {
assert(obj == rax, "must be");
- call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+ call(RuntimeAddress(Runtime1::entry_for(C1StubId::dtrace_object_alloc_id)));
}
verify_oop(obj);
@@ -309,7 +309,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1,
if (CURRENT_ENV->dtrace_alloc_probes()) {
assert(obj == rax, "must be");
- call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+ call(RuntimeAddress(Runtime1::entry_for(C1StubId::dtrace_object_alloc_id)));
}
verify_oop(obj);
diff --git a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
index 11b39ce15eb1a..1ccb06df48937 100644
--- a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
@@ -60,7 +60,7 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre
#ifdef _LP64
// At a method handle call, the stack may not be properly aligned
// when returning with an exception.
- align_stack = (stub_id() == Runtime1::handle_exception_from_callee_id);
+ align_stack = (stub_id() == (int)C1StubId::handle_exception_from_callee_id);
#endif
#ifdef _LP64
@@ -124,10 +124,10 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre
if (frame_size() == no_frame_size) {
leave();
jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
- } else if (_stub_id == Runtime1::forward_exception_id) {
+ } else if (_stub_id == (int)C1StubId::forward_exception_id) {
should_not_reach_here();
} else {
- jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
+ jump(RuntimeAddress(Runtime1::entry_for(C1StubId::forward_exception_id)));
}
bind(L);
}
@@ -671,7 +671,7 @@ OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address targe
}
-OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
+OopMapSet* Runtime1::generate_handle_exception(C1StubId id, StubAssembler *sasm) {
__ block_comment("generate_handle_exception");
// incoming parameters
@@ -684,7 +684,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
OopMapSet* oop_maps = new OopMapSet();
OopMap* oop_map = nullptr;
switch (id) {
- case forward_exception_id:
+ case C1StubId::forward_exception_id:
// We're handling an exception in the context of a compiled frame.
// The registers have been saved in the standard places. Perform
// an exception lookup in the caller and dispatch to the handler
@@ -703,12 +703,12 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
__ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
__ movptr(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);
break;
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
// At this point all registers MAY be live.
- oop_map = save_live_registers(sasm, 1 /*thread*/, id != handle_exception_nofpu_id);
+ oop_map = save_live_registers(sasm, 1 /*thread*/, id != C1StubId::handle_exception_nofpu_id);
break;
- case handle_exception_from_callee_id: {
+ case C1StubId::handle_exception_from_callee_id: {
// At this point all registers except exception oop (RAX) and
// exception pc (RDX) are dead.
const int frame_size = 2 /*BP, return address*/ NOT_LP64(+ 1 /*thread*/) WIN64_ONLY(+ frame::arg_reg_save_area_bytes / BytesPerWord);
@@ -775,13 +775,13 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
__ movptr(Address(rbp, 1*BytesPerWord), rax);
switch (id) {
- case forward_exception_id:
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::forward_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
// Restore the registers that were saved at the beginning.
- restore_live_registers(sasm, id != handle_exception_nofpu_id);
+ restore_live_registers(sasm, id != C1StubId::handle_exception_nofpu_id);
break;
- case handle_exception_from_callee_id:
+ case C1StubId::handle_exception_from_callee_id:
// WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP
// since we do a leave anyway.
@@ -935,7 +935,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
__ testptr(rax, rax); // have we deoptimized?
__ jump_cc(Assembler::equal,
- RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
+ RuntimeAddress(Runtime1::entry_for(C1StubId::forward_exception_id)));
// the deopt blob expects exceptions in the special fields of
// JavaThread, so copy and clear pending exception.
@@ -1007,7 +1007,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
}
-OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) {
// for better readability
const bool must_gc_arguments = true;
@@ -1019,7 +1019,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
// stub code & info for the different stubs
OopMapSet* oop_maps = nullptr;
switch (id) {
- case forward_exception_id:
+ case C1StubId::forward_exception_id:
{
oop_maps = generate_handle_exception(id, sasm);
__ leave();
@@ -1027,19 +1027,19 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case new_instance_id:
- case fast_new_instance_id:
- case fast_new_instance_init_check_id:
+ case C1StubId::new_instance_id:
+ case C1StubId::fast_new_instance_id:
+ case C1StubId::fast_new_instance_init_check_id:
{
Register klass = rdx; // Incoming
Register obj = rax; // Result
- if (id == new_instance_id) {
+ if (id == C1StubId::new_instance_id) {
__ set_info("new_instance", dont_gc_arguments);
- } else if (id == fast_new_instance_id) {
+ } else if (id == C1StubId::fast_new_instance_id) {
__ set_info("fast new_instance", dont_gc_arguments);
} else {
- assert(id == fast_new_instance_init_check_id, "bad StubID");
+ assert(id == C1StubId::fast_new_instance_init_check_id, "bad C1StubId");
__ set_info("fast new_instance init check", dont_gc_arguments);
}
@@ -1058,7 +1058,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
break;
- case counter_overflow_id:
+ case C1StubId::counter_overflow_id:
{
Register bci = rax, method = rbx;
__ enter();
@@ -1076,14 +1076,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case new_type_array_id:
- case new_object_array_id:
+ case C1StubId::new_type_array_id:
+ case C1StubId::new_object_array_id:
{
Register length = rbx; // Incoming
Register klass = rdx; // Incoming
Register obj = rax; // Result
- if (id == new_type_array_id) {
+ if (id == C1StubId::new_type_array_id) {
__ set_info("new_type_array", dont_gc_arguments);
} else {
__ set_info("new_object_array", dont_gc_arguments);
@@ -1096,7 +1096,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
Register t0 = obj;
__ movl(t0, Address(klass, Klass::layout_helper_offset()));
__ sarl(t0, Klass::_lh_array_tag_shift);
- int tag = ((id == new_type_array_id)
+ int tag = ((id == C1StubId::new_type_array_id)
? Klass::_lh_array_tag_type_value
: Klass::_lh_array_tag_obj_value);
__ cmpl(t0, tag);
@@ -1110,7 +1110,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ enter();
OopMap* map = save_live_registers(sasm, 3);
int call_offset;
- if (id == new_type_array_id) {
+ if (id == C1StubId::new_type_array_id) {
call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
} else {
call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
@@ -1128,7 +1128,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case new_multi_array_id:
+ case C1StubId::new_multi_array_id:
{ StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
// rax,: klass
// rbx,: rank
@@ -1145,7 +1145,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case register_finalizer_id:
+ case C1StubId::register_finalizer_id:
{
__ set_info("register_finalizer", dont_gc_arguments);
@@ -1185,44 +1185,44 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case throw_range_check_failed_id:
+ case C1StubId::throw_range_check_failed_id:
{ StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
}
break;
- case throw_index_exception_id:
+ case C1StubId::throw_index_exception_id:
{ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
}
break;
- case throw_div0_exception_id:
+ case C1StubId::throw_div0_exception_id:
{ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
}
break;
- case throw_null_pointer_exception_id:
+ case C1StubId::throw_null_pointer_exception_id:
{ StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
}
break;
- case handle_exception_nofpu_id:
- case handle_exception_id:
+ case C1StubId::handle_exception_nofpu_id:
+ case C1StubId::handle_exception_id:
{ StubFrame f(sasm, "handle_exception", dont_gc_arguments);
oop_maps = generate_handle_exception(id, sasm);
}
break;
- case handle_exception_from_callee_id:
+ case C1StubId::handle_exception_from_callee_id:
{ StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
oop_maps = generate_handle_exception(id, sasm);
}
break;
- case unwind_exception_id:
+ case C1StubId::unwind_exception_id:
{ __ set_info("unwind_exception", dont_gc_arguments);
// note: no stubframe since we are about to leave the current
// activation and we are calling a leaf VM function only.
@@ -1230,7 +1230,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case throw_array_store_exception_id:
+ case C1StubId::throw_array_store_exception_id:
{ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
// tos + 0: link
// + 1: return address
@@ -1238,19 +1238,19 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case throw_class_cast_exception_id:
+ case C1StubId::throw_class_cast_exception_id:
{ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
}
break;
- case throw_incompatible_class_change_error_id:
+ case C1StubId::throw_incompatible_class_change_error_id:
{ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
}
break;
- case slow_subtype_check_id:
+ case C1StubId::slow_subtype_check_id:
{
// Typical calling sequence:
// __ push(klass_RInfo); // object klass or other subclass
@@ -1303,10 +1303,10 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case monitorenter_nofpu_id:
+ case C1StubId::monitorenter_nofpu_id:
save_fpu_registers = false;
// fall through
- case monitorenter_id:
+ case C1StubId::monitorenter_id:
{
StubFrame f(sasm, "monitorenter", dont_gc_arguments);
OopMap* map = save_live_registers(sasm, 3, save_fpu_registers);
@@ -1324,10 +1324,10 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case monitorexit_nofpu_id:
+ case C1StubId::monitorexit_nofpu_id:
save_fpu_registers = false;
// fall through
- case monitorexit_id:
+ case C1StubId::monitorexit_id:
{
StubFrame f(sasm, "monitorexit", dont_gc_arguments);
OopMap* map = save_live_registers(sasm, 2, save_fpu_registers);
@@ -1347,7 +1347,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case deoptimize_id:
+ case C1StubId::deoptimize_id:
{
StubFrame f(sasm, "deoptimize", dont_gc_arguments);
const int num_rt_args = 2; // thread, trap_request
@@ -1364,35 +1364,35 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case access_field_patching_id:
+ case C1StubId::access_field_patching_id:
{ StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
// we should set up register map
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
}
break;
- case load_klass_patching_id:
+ case C1StubId::load_klass_patching_id:
{ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
// we should set up register map
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
}
break;
- case load_mirror_patching_id:
+ case C1StubId::load_mirror_patching_id:
{ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments);
// we should set up register map
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
}
break;
- case load_appendix_patching_id:
+ case C1StubId::load_appendix_patching_id:
{ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
// we should set up register map
oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
}
break;
- case dtrace_object_alloc_id:
+ case C1StubId::dtrace_object_alloc_id:
{ // rax,: object
StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
// we can't gc here so skip the oopmap but make sure that all
@@ -1407,7 +1407,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case fpu2long_stub_id:
+ case C1StubId::fpu2long_stub_id:
{
#ifdef _LP64
Label done;
@@ -1496,7 +1496,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
- case predicate_failed_trap_id:
+ case C1StubId::predicate_failed_trap_id:
{
StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments);
diff --git a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp
index 1990488d8a0df..44f897529e7ce 100644
--- a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp
@@ -80,8 +80,6 @@ int C2FastUnlockLightweightStub::max_size() const {
void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) {
assert(_t == rax, "must be");
- Label restore_held_monitor_count_and_slow_path;
-
{ // Restore lock-stack and handle the unlock in runtime.
__ bind(_push_and_slow_path);
@@ -91,61 +89,9 @@ void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) {
__ movptr(Address(_thread, _t), _obj);
#endif
__ addl(Address(_thread, JavaThread::lock_stack_top_offset()), oopSize);
- }
-
- { // Restore held monitor count and slow path.
-
- __ bind(restore_held_monitor_count_and_slow_path);
- __ bind(_slow_path);
- // Restore held monitor count.
- __ increment(Address(_thread, JavaThread::held_monitor_count_offset()));
- // increment will always result in ZF = 0 (no overflows).
+ // addl will always result in ZF = 0 (no overflows).
__ jmp(slow_path_continuation());
}
-
- { // Handle monitor medium path.
-
- __ bind(_check_successor);
-
- Label fix_zf_and_unlocked;
- const Register monitor = _mark;
-
-#ifndef _LP64
- __ jmpb(restore_held_monitor_count_and_slow_path);
-#else // _LP64
- const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value));
- const Address succ_address(monitor, ObjectMonitor::succ_offset() - monitor_tag);
- const Address owner_address(monitor, ObjectMonitor::owner_offset() - monitor_tag);
-
- // successor null check.
- __ cmpptr(succ_address, NULL_WORD);
- __ jccb(Assembler::equal, restore_held_monitor_count_and_slow_path);
-
- // Release lock.
- __ movptr(owner_address, NULL_WORD);
-
- // Fence.
- // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
- __ lock(); __ addl(Address(rsp, 0), 0);
-
- // Recheck successor.
- __ cmpptr(succ_address, NULL_WORD);
- // Observed a successor after the release -> fence we have handed off the monitor
- __ jccb(Assembler::notEqual, fix_zf_and_unlocked);
-
- // Try to relock, if it fails the monitor has been handed over
- // TODO: Caveat, this may fail due to deflation, which does
- // not handle the monitor handoff. Currently only works
- // due to the responsible thread.
- __ xorptr(rax, rax);
- __ lock(); __ cmpxchgptr(_thread, owner_address);
- __ jccb (Assembler::equal, restore_held_monitor_count_and_slow_path);
-#endif
-
- __ bind(fix_zf_and_unlocked);
- __ xorl(rax, rax);
- __ jmp(unlocked_continuation());
- }
}
#undef __
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
index c2801a791cb5a..839745f76ec6a 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@@ -459,87 +459,43 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
// IA32's memory-model is SPO, so STs are ordered with respect to
// each other and there's no need for an explicit barrier (fence).
// See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
-#ifndef _LP64
- // Note that we could employ various encoding schemes to reduce
- // the number of loads below (currently 4) to just 2 or 3.
- // Refer to the comments in synchronizer.cpp.
- // In practice the chain of fetches doesn't seem to impact performance, however.
- xorptr(boxReg, boxReg);
- orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
- jccb (Assembler::notZero, DONE_LABEL);
- movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
- orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
- jccb (Assembler::notZero, DONE_LABEL);
- movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
- jmpb (DONE_LABEL);
-#else // _LP64
- // It's inflated
- Label CheckSucc, LNotRecursive, LSuccess, LGoSlowPath;
+ Label LSuccess, LNotRecursive;
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
jccb(Assembler::equal, LNotRecursive);
// Recursive inflated unlock
- decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+ decrement(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
jmpb(LSuccess);
bind(LNotRecursive);
- movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
- orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
- jccb (Assembler::notZero, CheckSucc);
- // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
+
+ // Set owner to null.
+ // Release to satisfy the JMM
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
- jmpb (DONE_LABEL);
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
- // Try to avoid passing control into the slow_path ...
- bind (CheckSucc);
+ // Check if the entry lists are empty.
+ movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+ orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+ jccb(Assembler::zero, LSuccess); // If so we are done.
- // The following optional optimization can be elided if necessary
- // Effectively: if (succ == null) goto slow path
- // The code reduces the window for a race, however,
- // and thus benefits performance.
+ // Check if there is a successor.
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD);
- jccb (Assembler::zero, LGoSlowPath);
-
- xorptr(boxReg, boxReg);
- // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
- movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
+ jccb(Assembler::notZero, LSuccess); // If so we are done.
- // Memory barrier/fence
- // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
- // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
- // This is faster on Nehalem and AMD Shanghai/Barcelona.
- // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
- // We might also restructure (ST Owner=0;barrier;LD _Succ) to
- // (mov box,0; xchgq box, &m->Owner; LD _succ) .
- lock(); addl(Address(rsp, 0), 0);
+ // Save the monitor pointer in the current thread, so we can try to
+ // reacquire the lock in SharedRuntime::monitor_exit_helper().
+ andptr(tmpReg, ~(int32_t)markWord::monitor_value);
+#ifndef _LP64
+ get_thread(boxReg);
+ movptr(Address(boxReg, JavaThread::unlocked_inflated_monitor_offset()), tmpReg);
+#else // _LP64
+ movptr(Address(r15_thread, JavaThread::unlocked_inflated_monitor_offset()), tmpReg);
+#endif
- cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD);
- jccb (Assembler::notZero, LSuccess);
-
- // Rare inopportune interleaving - race.
- // The successor vanished in the small window above.
- // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
- // We need to ensure progress and succession.
- // Try to reacquire the lock.
- // If that fails then the new owner is responsible for succession and this
- // thread needs to take no further action and can exit via the fast path (success).
- // If the re-acquire succeeds then pass control into the slow path.
- // As implemented, this latter mode is horrible because we generated more
- // coherence traffic on the lock *and* artificially extended the critical section
- // length while by virtue of passing control into the slow path.
-
- // box is really RAX -- the following CMPXCHG depends on that binding
- // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
- lock();
- cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- // There's no successor so we tried to regrab the lock.
- // If that didn't work, then another thread grabbed the
- // lock so we're done (and exit was a success).
- jccb (Assembler::notEqual, LSuccess);
- // Intentional fall-through into slow path
-
- bind (LGoSlowPath);
orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
jmpb (DONE_LABEL);
@@ -547,7 +503,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
testl (boxReg, 0); // set ICC.ZF=1 to indicate success
jmpb (DONE_LABEL);
-#endif
if (LockingMode == LM_LEGACY) {
bind (Stacked);
movptr(tmpReg, Address (boxReg, 0)); // re-fetch
@@ -744,10 +699,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
// Handle inflated monitor.
Label inflated, inflated_check_lock_stack;
// Finish fast unlock successfully. MUST jump with ZF == 1
- Label unlocked;
-
- // Assume success.
- decrement(Address(thread, JavaThread::held_monitor_count_offset()));
+ Label unlocked, slow_path;
const Register mark = t;
const Register monitor = t;
@@ -763,8 +715,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
}
Label& push_and_slow_path = stub == nullptr ? dummy : stub->push_and_slow_path();
- Label& check_successor = stub == nullptr ? dummy : stub->check_successor();
- Label& slow_path = stub == nullptr ? dummy : stub->slow_path();
{ // Lightweight Unlock
@@ -839,6 +789,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value));
const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag};
const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag};
+ const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag};
const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag};
const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag};
@@ -846,27 +797,42 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
// Check if recursive.
cmpptr(recursions_address, 0);
- jccb(Assembler::notEqual, recursive);
+ jccb(Assembler::notZero, recursive);
+
+ // Set owner to null.
+ // Release to satisfy the JMM
+ movptr(owner_address, NULL_WORD);
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
// Check if the entry lists are empty.
movptr(reg_rax, cxq_address);
orptr(reg_rax, EntryList_address);
- jcc(Assembler::notZero, check_successor);
+ jccb(Assembler::zero, unlocked); // If so we are done.
- // Release lock.
- movptr(owner_address, NULL_WORD);
- jmpb(unlocked);
+ // Check if there is a successor.
+ cmpptr(succ_address, NULL_WORD);
+ jccb(Assembler::notZero, unlocked); // If so we are done.
+
+ // Save the monitor pointer in the current thread, so we can try to
+ // reacquire the lock in SharedRuntime::monitor_exit_helper().
+ if (!UseObjectMonitorTable) {
+ andptr(monitor, ~(int32_t)markWord::monitor_value);
+ }
+ movptr(Address(thread, JavaThread::unlocked_inflated_monitor_offset()), monitor);
+
+ testl(monitor, monitor); // Fast Unlock ZF = 0
+ jmpb(slow_path);
// Recursive unlock.
bind(recursive);
decrement(recursions_address);
- xorl(t, t);
}
bind(unlocked);
- if (stub != nullptr) {
- bind(stub->unlocked_continuation());
- }
+ decrement(Address(thread, JavaThread::held_monitor_count_offset()));
+ xorl(t, t); // Fast Unlock ZF = 1
#ifdef ASSERT
// Check that unlocked label is reached with ZF set.
@@ -875,6 +841,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
stop("Fast Unlock ZF != 1");
#endif
+ bind(slow_path);
if (stub != nullptr) {
bind(stub->slow_path_continuation());
}
diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
index b52be627776b8..b6be4012519a0 100644
--- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
@@ -38,7 +38,10 @@
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/g1/c1/g1BarrierSetC1.hpp"
-#endif
+#endif // COMPILER1
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif // COMPILER2
#define __ masm->
@@ -160,6 +163,56 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator
}
}
+static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
+ const Register thread, const Register value, const Register temp) {
+ // This code assumes that buffer index is pointer sized.
+ STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t));
+ // Can we store a value in the given thread's buffer?
+ // (The index field is typed as size_t.)
+ __ movptr(temp, Address(thread, in_bytes(index_offset))); // temp := *(index address)
+ __ testptr(temp, temp); // index == 0?
+ __ jcc(Assembler::zero, runtime); // jump to runtime if index == 0 (full buffer)
+ // The buffer is not full, store value into it.
+ __ subptr(temp, wordSize); // temp := next index
+ __ movptr(Address(thread, in_bytes(index_offset)), temp); // *(index address) := next index
+ __ addptr(temp, Address(thread, in_bytes(buffer_offset))); // temp := buffer address + next index
+ __ movptr(Address(temp, 0), value); // *(buffer address + next index) := value
+}
+
+static void generate_pre_barrier_fast_path(MacroAssembler* masm,
+ const Register thread) {
+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+ // Is marking active?
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+ __ cmpl(in_progress, 0);
+ } else {
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ cmpb(in_progress, 0);
+ }
+}
+
+static void generate_pre_barrier_slow_path(MacroAssembler* masm,
+ const Register obj,
+ const Register pre_val,
+ const Register thread,
+ const Register tmp,
+ Label& done,
+ Label& runtime) {
+ // Do we need to load the previous value?
+ if (obj != noreg) {
+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
+ }
+ // Is the previous value null?
+ __ cmpptr(pre_val, NULL_WORD);
+ __ jcc(Assembler::equal, done);
+ generate_queue_insertion(masm,
+ G1ThreadLocalData::satb_mark_queue_index_offset(),
+ G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ runtime,
+ thread, pre_val, tmp);
+ __ jmp(done);
+}
+
void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
Register obj,
Register pre_val,
@@ -185,43 +238,10 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
assert(pre_val != rax, "check this code");
}
- Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
- Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
-
- // Is marking active?
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
- __ cmpl(in_progress, 0);
- } else {
- assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ cmpb(in_progress, 0);
- }
- __ jcc(Assembler::equal, done);
-
- // Do we need to load the previous value?
- if (obj != noreg) {
- __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
- }
-
- // Is the previous value null?
- __ cmpptr(pre_val, NULL_WORD);
+ generate_pre_barrier_fast_path(masm, thread);
+ // If marking is not active (*(mark queue active address) == 0), jump to done
__ jcc(Assembler::equal, done);
-
- // Can we store original value in the thread's buffer?
- // Is index == 0?
- // (The index field is typed as size_t.)
-
- __ movptr(tmp, index); // tmp := *index_adr
- __ cmpptr(tmp, 0); // tmp == 0?
- __ jcc(Assembler::equal, runtime); // If yes, goto runtime
-
- __ subptr(tmp, wordSize); // tmp := tmp - wordSize
- __ movptr(index, tmp); // *index_adr := tmp
- __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr
-
- // Record the previous value
- __ movptr(Address(tmp, 0), pre_val);
- __ jmp(done);
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, done, runtime);
__ bind(runtime);
@@ -263,6 +283,54 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
__ bind(done);
}
+static void generate_post_barrier_fast_path(MacroAssembler* masm,
+ const Register store_addr,
+ const Register new_val,
+ const Register tmp,
+ const Register tmp2,
+ Label& done,
+ bool new_val_may_be_null) {
+ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
+ // Does store cross heap regions?
+ __ movptr(tmp, store_addr); // tmp := store address
+ __ xorptr(tmp, new_val); // tmp := store address ^ new value
+ __ shrptr(tmp, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0?
+ __ jcc(Assembler::equal, done);
+ // Crosses regions, storing null?
+ if (new_val_may_be_null) {
+ __ cmpptr(new_val, NULL_WORD); // new value == null?
+ __ jcc(Assembler::equal, done);
+ }
+ // Storing region crossing non-null, is card young?
+ __ movptr(tmp, store_addr); // tmp := store address
+ __ shrptr(tmp, CardTable::card_shift()); // tmp := card address relative to card table base
+ // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
+ // a valid address and therefore is not properly handled by the relocation code.
+ __ movptr(tmp2, (intptr_t)ct->card_table()->byte_map_base()); // tmp2 := card table base address
+ __ addptr(tmp, tmp2); // tmp := card address
+ __ cmpb(Address(tmp, 0), G1CardTable::g1_young_card_val()); // *(card address) == young_card_val?
+}
+
+static void generate_post_barrier_slow_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp,
+ const Register tmp2,
+ Label& done,
+ Label& runtime) {
+ __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); // StoreLoad membar
+ __ cmpb(Address(tmp, 0), G1CardTable::dirty_card_val()); // *(card address) == dirty_card_val?
+ __ jcc(Assembler::equal, done);
+ // Storing a region crossing, non-null oop, card is clean.
+ // Dirty card and log.
+ __ movb(Address(tmp, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
+ generate_queue_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ runtime,
+ thread, tmp, tmp2);
+ __ jmp(done);
+}
+
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
@@ -273,74 +341,125 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
assert(thread == r15_thread, "must be");
#endif // _LP64
- Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
-
- CardTableBarrierSet* ct =
- barrier_set_cast(BarrierSet::barrier_set());
-
Label done;
Label runtime;
- // Does store cross heap regions?
-
- __ movptr(tmp, store_addr);
- __ xorptr(tmp, new_val);
- __ shrptr(tmp, G1HeapRegion::LogOfHRGrainBytes);
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, tmp2, done, true /* new_val_may_be_null */);
+ // If card is young, jump to done
__ jcc(Assembler::equal, done);
+ generate_post_barrier_slow_path(masm, thread, tmp, tmp2, done, runtime);
- // crosses regions, storing null?
+ __ bind(runtime);
+ // save the live input values
+ RegSet saved = RegSet::of(store_addr NOT_LP64(COMMA thread));
+ __ push_set(saved);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp, thread);
+ __ pop_set(saved);
- __ cmpptr(new_val, NULL_WORD);
- __ jcc(Assembler::equal, done);
+ __ bind(done);
+}
- // storing region crossing non-null, is card already dirty?
+#if defined(COMPILER2)
- const Register card_addr = tmp;
- const Register cardtable = tmp2;
+static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) {
+#ifdef _LP64
+ SaveLiveRegisters save_registers(masm, stub);
+ if (c_rarg0 != arg) {
+ __ mov(c_rarg0, arg);
+ }
+ __ mov(c_rarg1, r15_thread);
+ // rax is a caller-saved, non-argument-passing register, so it does not
+ // interfere with c_rarg0 or c_rarg1. If it contained any live value before
+ // entering this stub, it is saved at this point, and restored after the
+ // call. If it did not contain any live value, it is free to be used. In
+ // either case, it is safe to use it here as a call scratch register.
+ __ call(RuntimeAddress(runtime_path), rax);
+#else
+ Unimplemented();
+#endif // _LP64
+}
- __ movptr(card_addr, store_addr);
- __ shrptr(card_addr, CardTable::card_shift());
- // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
- // a valid address and therefore is not properly handled by the relocation code.
- __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
- __ addptr(card_addr, cardtable);
+void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp,
+ G1PreBarrierStubC2* stub) {
+#ifdef _LP64
+ assert(thread == r15_thread, "must be");
+#endif // _LP64
+ assert(pre_val != noreg, "check this code");
+ if (obj != noreg) {
+ assert_different_registers(obj, pre_val, tmp);
+ }
- __ cmpb(Address(card_addr, 0), G1CardTable::g1_young_card_val());
- __ jcc(Assembler::equal, done);
+ stub->initialize_registers(obj, pre_val, thread, tmp);
- __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
- __ cmpb(Address(card_addr, 0), G1CardTable::dirty_card_val());
- __ jcc(Assembler::equal, done);
+ generate_pre_barrier_fast_path(masm, thread);
+ // If marking is active (*(mark queue active address) != 0), jump to stub (slow path)
+ __ jcc(Assembler::notEqual, *stub->entry());
+ __ bind(*stub->continuation());
+}
- // storing a region crossing, non-null oop, card is clean.
- // dirty card and log.
+void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register obj = stub->obj();
+ Register pre_val = stub->pre_val();
+ Register thread = stub->thread();
+ Register tmp = stub->tmp1();
+ assert(stub->tmp2() == noreg, "not needed in this platform");
- __ movb(Address(card_addr, 0), G1CardTable::dirty_card_val());
+ __ bind(*stub->entry());
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, *stub->continuation(), runtime);
- // The code below assumes that buffer index is pointer sized.
- STATIC_ASSERT(in_bytes(G1DirtyCardQueue::byte_width_of_index()) == sizeof(intptr_t));
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
+ __ jmp(*stub->continuation());
+}
- __ movptr(tmp2, queue_index);
- __ testptr(tmp2, tmp2);
- __ jcc(Assembler::zero, runtime);
- __ subptr(tmp2, wordSize);
- __ movptr(queue_index, tmp2);
- __ addptr(tmp2, buffer);
- __ movptr(Address(tmp2, 0), card_addr);
- __ jmp(done);
+void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp,
+ Register tmp2,
+ G1PostBarrierStubC2* stub) {
+#ifdef _LP64
+ assert(thread == r15_thread, "must be");
+#endif // _LP64
- __ bind(runtime);
- // save the live input values
- RegSet saved = RegSet::of(store_addr NOT_LP64(COMMA thread));
- __ push_set(saved);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
- __ pop_set(saved);
+ stub->initialize_registers(thread, tmp, tmp2);
- __ bind(done);
+ bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, tmp2, *stub->continuation(), new_val_may_be_null);
+ // If card is not young, jump to stub (slow path)
+ __ jcc(Assembler::notEqual, *stub->entry());
+
+ __ bind(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register thread = stub->thread();
+ Register tmp = stub->tmp1(); // tmp holds the card address.
+ Register tmp2 = stub->tmp2();
+ assert(stub->tmp3() == noreg, "not needed in this platform");
+
+ __ bind(*stub->entry());
+ generate_post_barrier_slow_path(masm, thread, tmp, tmp2, *stub->continuation(), runtime);
+
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, tmp, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
+ __ jmp(*stub->continuation());
}
+#endif // COMPILER2
+
void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
bool in_heap = (decorators & IN_HEAP) != 0;
diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp
index a5695f5657a4a..4dbb1efd885ea 100644
--- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp
@@ -32,6 +32,9 @@ class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
+class G1BarrierStubC2;
+class G1PreBarrierStubC2;
+class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -65,6 +68,26 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1, Register tmp_thread);
+
+#ifdef COMPILER2
+ void g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp,
+ G1PreBarrierStubC2* c2_stub);
+ void generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const;
+ void g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp,
+ Register tmp2,
+ G1PostBarrierStubC2* c2_stub);
+ void generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const;
+#endif // COMPILER2
};
#endif // CPU_X86_GC_G1_G1BARRIERSETASSEMBLER_X86_HPP
diff --git a/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad b/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad
new file mode 100644
index 0000000000000..8c1559f90f46d
--- /dev/null
+++ b/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad
@@ -0,0 +1,371 @@
+//
+// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+source_hpp %{
+
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#include "gc/shared/gc_globals.hpp"
+
+%}
+
+source %{
+
+#include "gc/g1/g1BarrierSetAssembler_x86.hpp"
+#include "gc/g1/g1BarrierSetRuntime.hpp"
+
+static void write_barrier_pre(MacroAssembler* masm,
+ const MachNode* node,
+ Register obj,
+ Register pre_val,
+ Register tmp,
+ RegSet preserve = RegSet(),
+ RegSet no_preserve = RegSet()) {
+ if (!G1PreBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node);
+ for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) {
+ stub->preserve(*reg);
+ }
+ for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) {
+ stub->dont_preserve(*reg);
+ }
+ g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, r15_thread, tmp, stub);
+}
+
+static void write_barrier_post(MacroAssembler* masm,
+ const MachNode* node,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2) {
+ if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, r15_thread, tmp1, tmp2, stub);
+}
+
+%}
+
+instruct g1StoreP(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreP mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(125); // XXX
+ format %{ "movq $mem, $src\t# ptr" %}
+ ins_encode %{
+ // Materialize the store address internally (as opposed to defining 'mem' as
+ // an indirect memory operand) to reduce the overhead of LCM when processing
+ // large basic blocks with many stores. Such basic blocks arise, for
+ // instance, from static initializations of large String arrays.
+ // The same holds for g1StoreN and g1EncodePAndStoreN.
+ __ lea($tmp1$$Register, $mem$$Address);
+ write_barrier_pre(masm, this,
+ $tmp1$$Register /* obj */,
+ $tmp2$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($tmp1$$Register, $src$$Register) /* preserve */);
+ __ movq(Address($tmp1$$Register, 0), $src$$Register);
+ write_barrier_post(masm, this,
+ $tmp1$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp3$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(ialu_mem_reg);
+%}
+
+instruct g1StoreN(memory mem, rRegN src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(125); // XXX
+ format %{ "movl $mem, $src\t# ptr" %}
+ ins_encode %{
+ __ lea($tmp1$$Register, $mem$$Address);
+ write_barrier_pre(masm, this,
+ $tmp1$$Register /* obj */,
+ $tmp2$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($tmp1$$Register, $src$$Register) /* preserve */);
+ __ movl(Address($tmp1$$Register, 0), $src$$Register);
+ if ((barrier_data() & G1C2BarrierPost) != 0) {
+ __ movl($tmp2$$Register, $src$$Register);
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ decode_heap_oop($tmp2$$Register);
+ } else {
+ __ decode_heap_oop_not_null($tmp2$$Register);
+ }
+ }
+ write_barrier_post(masm, this,
+ $tmp1$$Register /* store_addr */,
+ $tmp2$$Register /* new_val */,
+ $tmp3$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(ialu_mem_reg);
+%}
+
+instruct g1EncodePAndStoreN(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem (EncodeP src)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(125); // XXX
+ format %{ "encode_heap_oop $src\n\t"
+ "movl $mem, $src\t# ptr" %}
+ ins_encode %{
+ __ lea($tmp1$$Register, $mem$$Address);
+ write_barrier_pre(masm, this,
+ $tmp1$$Register /* obj */,
+ $tmp2$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($tmp1$$Register, $src$$Register) /* preserve */);
+ __ movq($tmp2$$Register, $src$$Register);
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ encode_heap_oop($tmp2$$Register);
+ } else {
+ __ encode_heap_oop_not_null($tmp2$$Register);
+ }
+ __ movl(Address($tmp1$$Register, 0), $tmp2$$Register);
+ write_barrier_post(masm, this,
+ $tmp1$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp3$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(ialu_mem_reg);
+%}
+
+instruct g1CompareAndExchangeP(indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegP oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set oldval (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ format %{ "lock\n\t"
+ "cmpxchgq $newval, $mem" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ // Pass $oldval to the pre-barrier (instead of loading from $mem), because
+ // $oldval is the only value that can be overwritten.
+ // The same holds for g1CompareAndSwapP.
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */);
+ __ movq($tmp1$$Register, $newval$$Register);
+ __ lock();
+ __ cmpxchgq($tmp1$$Register, Address($mem$$Register, 0));
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct g1CompareAndExchangeN(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegN oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set oldval (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ format %{ "lock\n\t"
+ "cmpxchgq $newval, $mem" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp2$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */);
+ __ movl($tmp1$$Register, $newval$$Register);
+ __ lock();
+ __ cmpxchgl($tmp1$$Register, Address($mem$$Register, 0));
+ __ decode_heap_oop($tmp1$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct g1CompareAndSwapP(rRegI res, indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegP oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL oldval, KILL cr);
+ format %{ "lock\n\t"
+ "cmpxchgq $newval, $mem\n\t"
+ "sete $res\n\t"
+ "movzbl $res, $res" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ movq($tmp1$$Register, $newval$$Register);
+ __ lock();
+ __ cmpxchgq($tmp1$$Register, Address($mem$$Register, 0));
+ __ setb(Assembler::equal, $res$$Register);
+ __ movzbl($res$$Register, $res$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct g1CompareAndSwapN(rRegI res, indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegN oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL oldval, KILL cr);
+ format %{ "lock\n\t"
+ "cmpxchgq $newval, $mem\n\t"
+ "sete $res\n\t"
+ "movzbl $res, $res" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp2$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ movl($tmp1$$Register, $newval$$Register);
+ __ lock();
+ __ cmpxchgl($tmp1$$Register, Address($mem$$Register, 0));
+ __ setb(Assembler::equal, $res$$Register);
+ __ movzbl($res$$Register, $res$$Register);
+ __ decode_heap_oop($tmp1$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct g1GetAndSetP(indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set newval (GetAndSetP mem newval));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ format %{ "xchgq $newval, $mem" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp2$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ __ movq($tmp1$$Register, $newval$$Register);
+ __ xchgq($newval$$Register, Address($mem$$Register, 0));
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct g1GetAndSetN(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set newval (GetAndSetN mem newval));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ format %{ "xchgq $newval, $mem" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp2$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ __ movl($tmp1$$Register, $newval$$Register);
+ __ decode_heap_oop($tmp1$$Register);
+ __ xchgl($newval$$Register, Address($mem$$Register, 0));
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct g1LoadP(rRegP dst, memory mem, rRegP tmp, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadP mem));
+ effect(TEMP dst, TEMP tmp, KILL cr);
+ ins_cost(125); // XXX
+ format %{ "movq $dst, $mem\t# ptr" %}
+ ins_encode %{
+ __ movq($dst$$Register, $mem$$Address);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $dst$$Register /* pre_val */,
+ $tmp$$Register /* tmp */);
+ %}
+ ins_pipe(ialu_reg_mem); // XXX
+%}
+
+instruct g1LoadN(rRegN dst, memory mem, rRegP tmp1, rRegP tmp2, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadN mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(125); // XXX
+ format %{ "movl $dst, $mem\t# compressed ptr" %}
+ ins_encode %{
+ __ movl($dst$$Register, $mem$$Address);
+ __ movl($tmp1$$Register, $dst$$Register);
+ __ decode_heap_oop($tmp1$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp */);
+ %}
+ ins_pipe(ialu_reg_mem); // XXX
+%}
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
index 47078dff90738..a7682fe0c3879 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
@@ -163,12 +163,12 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec
assert(dst == rsi, "expected");
assert(count == rdx, "expected");
if (UseCompressedOops) {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop),
src, dst, count);
} else
#endif
{
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry),
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop),
src, dst, count);
}
@@ -296,9 +296,9 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
__ push(thread);
__ push(pre_val);
#endif
- __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2);
+ __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), 2);
} else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread);
}
NOT_LP64( __ pop(thread); )
@@ -925,7 +925,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
// load the pre-value
__ load_parameter(0, rcx);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), rcx, thread);
__ restore_live_registers(true);
diff --git a/src/hotspot/cpu/x86/gc/x/x_x86_64.ad b/src/hotspot/cpu/x86/gc/x/x_x86_64.ad
index 116fb3cbc6d5e..ba4b3cb6df05f 100644
--- a/src/hotspot/cpu/x86/gc/x/x_x86_64.ad
+++ b/src/hotspot/cpu/x86/gc/x/x_x86_64.ad
@@ -126,8 +126,7 @@ instruct xCompareAndSwapP(rRegI res, indirect mem, rRegP newval, rRegP tmp, rFla
format %{ "lock\n\t"
"cmpxchgq $newval, $mem\n\t"
- "sete $res\n\t"
- "movzbl $res, $res" %}
+ "setcc $res \t# emits sete + movzbl or setzue for APX" %}
ins_encode %{
precond($oldval$$Register == rax);
@@ -135,8 +134,7 @@ instruct xCompareAndSwapP(rRegI res, indirect mem, rRegP newval, rRegP tmp, rFla
if (barrier_data() != XLoadBarrierElided) {
__ cmpptr($tmp$$Register, rax);
}
- __ setb(Assembler::equal, $res$$Register);
- __ movzbl($res$$Register, $res$$Register);
+ __ setcc(Assembler::equal, $res$$Register);
%}
ins_pipe(pipe_cmpxchg);
diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
index f5f0d6c884198..65d7c1e3303ba 100644
--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
@@ -636,7 +636,7 @@ void ZBarrierSetAssembler::copy_load_at(MacroAssembler* masm,
// Remove metadata bits so that the store side (vectorized or non-vectorized) can
// inject the store-good color with an or instruction.
- __ andq(dst, _zpointer_address_mask);
+ __ andq(dst, ZPointerAddressMask);
if ((decorators & ARRAYCOPY_CHECKCAST) != 0) {
// The checkcast arraycopy needs to be able to dereference the oops in order to perform a typechecks.
diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
index 7c3716ba0da9f..5fbc7ea1be16e 100644
--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
@@ -64,7 +64,7 @@ class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
GrowableArrayCHeap _store_good_relocations;
public:
- static const int32_t _zpointer_address_mask = 0xFFFF0000;
+ static const int32_t ZPointerAddressMask = 0xFFFF0000;
ZBarrierSetAssembler();
diff --git a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
index 1a4499c3d447d..455d622acdf17 100644
--- a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
+++ b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
@@ -141,7 +141,7 @@ instruct zLoadPNullCheck(rFlagsReg cr, memory op, immP0 zero)
ins_encode %{
// A null pointer will have all address bits 0. This mask sign extends
// all address bits, so we can test if the address is 0.
- __ testq($op$$Address, ZBarrierSetAssembler::_zpointer_address_mask);
+ __ testq($op$$Address, ZBarrierSetAssembler::ZPointerAddressMask);
%}
ins_pipe(ialu_cr_reg_imm);
%}
@@ -212,8 +212,7 @@ instruct zCompareAndSwapP(rRegI res, indirect mem, rRegP newval, rRegP tmp, rax_
format %{ "lock\n\t"
"cmpxchgq $newval, $mem\n\t"
- "sete $res\n\t"
- "movzbl $res, $res" %}
+ "setcc $res \t# emits sete + movzbl or setzue for APX" %}
ins_encode %{
assert_different_registers($oldval$$Register, $mem$$Register);
@@ -222,8 +221,7 @@ instruct zCompareAndSwapP(rRegI res, indirect mem, rRegP newval, rRegP tmp, rax_
z_color(masm, this, $oldval$$Register);
__ lock();
__ cmpxchgptr($tmp$$Register, mem_addr);
- __ setb(Assembler::equal, $res$$Register);
- __ movzbl($res$$Register, $res$$Register);
+ __ setcc(Assembler::equal, $res$$Register);
%}
ins_pipe(pipe_cmpxchg);
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
index ba337751d19d1..893ae4e844ba4 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@@ -5756,7 +5756,7 @@ void MacroAssembler::verify_heapbase(const char* msg) {
assert (Universe::heap() != nullptr, "java heap should be initialized");
if (CheckCompressedOops) {
Label ok;
- ExternalAddress src2(CompressedOops::ptrs_base_addr());
+ ExternalAddress src2(CompressedOops::base_addr());
const bool is_src2_reachable = reachable(src2);
if (!is_src2_reachable) {
push(rscratch1); // cmpptr trashes rscratch1
@@ -6047,10 +6047,10 @@ void MacroAssembler::reinit_heapbase() {
if (CompressedOops::base() == nullptr) {
MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
} else {
- mov64(r12_heapbase, (int64_t)CompressedOops::ptrs_base());
+ mov64(r12_heapbase, (int64_t)CompressedOops::base());
}
} else {
- movptr(r12_heapbase, ExternalAddress(CompressedOops::ptrs_base_addr()));
+ movptr(r12_heapbase, ExternalAddress(CompressedOops::base_addr()));
}
}
}
@@ -10421,4 +10421,13 @@ void MacroAssembler::restore_legacy_gprs() {
movq(rax, Address(rsp, 15 * wordSize));
addq(rsp, 16 * wordSize);
}
+
+void MacroAssembler::setcc(Assembler::Condition comparison, Register dst) {
+ if (VM_Version::supports_apx_f()) {
+ esetzucc(comparison, dst);
+ } else {
+ setb(comparison, dst);
+ movzbl(dst, dst);
+ }
+}
#endif
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
index 594f0b95ca3e2..2ce4fc40e90e0 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -2154,6 +2154,7 @@ class MacroAssembler: public Assembler {
#ifdef _LP64
void save_legacy_gprs();
void restore_legacy_gprs();
+ void setcc(Assembler::Condition comparison, Register dst);
#endif
};
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp
index 439c17b10d37a..09d379a4296d4 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp
@@ -81,8 +81,8 @@ void MacroAssembler::fast_md5(Register buf, Address state, Address ofs, Address
notl(rsi); \
andl(rdi, r2); \
andl(rsi, r3); \
- orl(rsi, rdi); \
addl(r1, rsi); \
+ addl(r1, rdi); \
roll(r1, s); \
addl(r1, r2);
diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
index 4bd91f640fca7..174e2e0277903 100644
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
@@ -2674,7 +2674,7 @@ void SharedRuntime::generate_deopt_blob() {
int reexecute_offset = __ pc() - start;
#if INCLUDE_JVMCI && !defined(COMPILER1)
- if (EnableJVMCI && UseJVMCICompiler) {
+ if (UseJVMCICompiler) {
// JVMCI does not use this kind of deoptimization
__ should_not_reach_here();
}
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
index 2bc4a0a9cba94..835bfc770fe90 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@@ -24,6 +24,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
+#include "classfile/javaClasses.hpp"
#include "classfile/vmIntrinsics.hpp"
#include "compiler/oopMap.hpp"
#include "gc/shared/barrierSet.hpp"
@@ -3573,6 +3574,9 @@ void StubGenerator::generate_libm_stubs() {
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
StubRoutines::_dtan = generate_libmTan(); // from stubGenerator_x86_64_tan.cpp
}
+ if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtanh)) {
+ StubRoutines::_dtanh = generate_libmTanh(); // from stubGenerator_x86_64_tanh.cpp
+ }
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) {
StubRoutines::_dexp = generate_libmExp(); // from stubGenerator_x86_64_exp.cpp
}
@@ -3793,6 +3797,28 @@ address StubGenerator::generate_upcall_stub_exception_handler() {
return start;
}
+// load Method* target of MethodHandle
+// j_rarg0 = jobject receiver
+// rbx = result
+address StubGenerator::generate_upcall_stub_load_target() {
+ StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target");
+ address start = __ pc();
+
+ __ resolve_global_jobject(j_rarg0, r15_thread, rscratch1);
+ // Load target method from receiver
+ __ load_heap_oop(rbx, Address(j_rarg0, java_lang_invoke_MethodHandle::form_offset()), rscratch1);
+ __ load_heap_oop(rbx, Address(rbx, java_lang_invoke_LambdaForm::vmentry_offset()), rscratch1);
+ __ load_heap_oop(rbx, Address(rbx, java_lang_invoke_MemberName::method_offset()), rscratch1);
+ __ access_load_at(T_ADDRESS, IN_HEAP, rbx,
+ Address(rbx, java_lang_invoke_ResolvedMethodName::vmtarget_offset()),
+ noreg, noreg);
+ __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // just in case callee is deoptimized
+
+ __ ret(0);
+
+ return start;
+}
+
address StubGenerator::generate_lookup_secondary_supers_table_stub(u1 super_klass_index) {
StubCodeMark mark(this, "StubRoutines", "lookup_secondary_supers_table");
@@ -3952,6 +3978,7 @@ void StubGenerator::generate_final_stubs() {
}
StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler();
+ StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target();
}
void StubGenerator::generate_compiler_stubs() {
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
index d65c681585d6d..7280e9fbe957e 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
@@ -376,11 +376,22 @@ class StubGenerator: public StubCodeGenerator {
void roundDec(XMMRegister key, int rnum);
void lastroundDec(XMMRegister key, int rnum);
void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
- void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl, Register rscratch);
- void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
- XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
- XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
- bool final_reduction, int index, XMMRegister counter_inc_mask);
+ void ghash16_encrypt_parallel16_avx512(Register in, Register out, Register ct, Register pos, Register avx512_subkeyHtbl,
+ Register CTR_CHECK, Register NROUNDS, Register key, XMMRegister CTR, XMMRegister GHASH,
+ XMMRegister ADDBE_4x4, XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK,
+ bool hk_broadcast, bool is_hash_start, bool do_hash_reduction, bool do_hash_hxor,
+ bool no_ghash_in, int ghashin_offset, int aesout_offset, int hashkey_offset);
+ void generateHtbl_32_blocks_avx512(Register htbl, Register avx512_htbl);
+ void initial_blocks_16_avx512(Register in, Register out, Register ct, Register pos, Register key, Register avx512_subkeyHtbl,
+ Register CTR_CHECK, Register rounds, XMMRegister CTR, XMMRegister GHASH, XMMRegister ADDBE_4x4,
+ XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK, int stack_offset);
+ void gcm_enc_dec_last_avx512(Register len, Register in, Register pos, XMMRegister HASH, XMMRegister SHUFM, Register subkeyHtbl,
+ int ghashin_offset, int hashkey_offset, bool start_ghash, bool do_reduction);
+ void ghash16_avx512(bool start_ghash, bool do_reduction, bool uload_shuffle, bool hk_broadcast, bool do_hxor,
+ Register in, Register pos, Register subkeyHtbl, XMMRegister HASH, XMMRegister SHUFM, int in_offset,
+ int in_disp, int displacement, int hashkey_offset);
+ void aesgcm_avx512(Register in, Register len, Register ct, Register out, Register key,
+ Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter);
// AVX2 AES-GCM related functions
void initial_blocks_avx2(XMMRegister ctr, Register rounds, Register key, Register len,
Register in, Register out, Register ct, XMMRegister aad_hashx, Register pos);
@@ -546,6 +557,7 @@ class StubGenerator: public StubCodeGenerator {
address generate_libmSin();
address generate_libmCos();
address generate_libmTan();
+ address generate_libmTanh();
address generate_libmExp();
address generate_libmPow();
address generate_libmLog();
@@ -608,6 +620,7 @@ class StubGenerator: public StubCodeGenerator {
// shared exception handler for FFM upcall stubs
address generate_upcall_stub_exception_handler();
+ address generate_upcall_stub_load_target();
// Specialized stub implementations for UseSecondarySupersTable.
address generate_lookup_secondary_supers_table_stub(u1 super_klass_index);
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
index 9744169498c8b..f14d368c376e1 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2019, 2023, Intel Corporation. All rights reserved.
+* Copyright (c) 2019, 2024, Intel Corporation. All rights reserved.
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -172,6 +172,38 @@ static address ghash_polynomial_two_one_addr() {
return (address)GHASH_POLYNOMIAL_TWO_ONE;
}
+// This mask is used for incrementing counter value
+ATTRIBUTE_ALIGNED(64) static const uint64_t COUNTER_MASK_ADDBE_4444[] = {
+ 0x0000000000000000ULL, 0x0400000000000000ULL,
+ 0x0000000000000000ULL, 0x0400000000000000ULL,
+ 0x0000000000000000ULL, 0x0400000000000000ULL,
+ 0x0000000000000000ULL, 0x0400000000000000ULL,
+};
+static address counter_mask_addbe_4444_addr() {
+ return (address)COUNTER_MASK_ADDBE_4444;
+}
+
+// This mask is used for incrementing counter value
+ATTRIBUTE_ALIGNED(64) static const uint64_t COUNTER_MASK_ADDBE_1234[] = {
+ 0x0000000000000000ULL, 0x0100000000000000ULL,
+ 0x0000000000000000ULL, 0x0200000000000000ULL,
+ 0x0000000000000000ULL, 0x0300000000000000ULL,
+ 0x0000000000000000ULL, 0x0400000000000000ULL,
+};
+static address counter_mask_addbe_1234_addr() {
+ return (address)COUNTER_MASK_ADDBE_1234;
+}
+
+// This mask is used for incrementing counter value
+ATTRIBUTE_ALIGNED(64) static const uint64_t COUNTER_MASK_ADD_1234[] = {
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
+ 0x0000000000000002ULL, 0x0000000000000000ULL,
+ 0x0000000000000003ULL, 0x0000000000000000ULL,
+ 0x0000000000000004ULL, 0x0000000000000000ULL,
+};
+static address counter_mask_add_1234_addr() {
+ return (address)COUNTER_MASK_ADD_1234;
+}
// AES intrinsic stubs
@@ -209,10 +241,10 @@ void StubGenerator::generate_aes_stubs() {
// len = rdx (c_rarg1) | rdi (c_rarg1)
// ct = r8 (c_rarg2) | rdx (c_rarg2)
// out = r9 (c_rarg3) | rcx (c_rarg3)
-// key = r10 | r8 (c_rarg4)
-// state = r13 | r9 (c_rarg5)
-// subkeyHtbl = r14 | r11
-// counter = rsi | r12
+// key = rsi | r8 (c_rarg4)
+// state = rdi | r9 (c_rarg5)
+// subkeyHtbl = r10 | r10
+// counter = r11 | r11
//
// Output:
// rax - number of processed bytes
@@ -230,31 +262,31 @@ address StubGenerator::generate_galoisCounterMode_AESCrypt() {
const Register key = c_rarg4;
const Register state = c_rarg5;
const Address subkeyH_mem(rbp, 2 * wordSize);
- const Register subkeyHtbl = r11;
- const Register avx512_subkeyHtbl = r13;
+ const Register subkeyHtbl = r10;
+ const Register avx512_subkeyHtbl = r12;
const Address counter_mem(rbp, 3 * wordSize);
- const Register counter = r12;
+ const Register counter = r11;
#else
const Address key_mem(rbp, 6 * wordSize);
- const Register key = r10;
+ const Register key = rsi;
const Address state_mem(rbp, 7 * wordSize);
- const Register state = r13;
+ const Register state = rdi;
const Address subkeyH_mem(rbp, 8 * wordSize);
- const Register subkeyHtbl = r14;
+ const Register subkeyHtbl = r10;
const Register avx512_subkeyHtbl = r12;
const Address counter_mem(rbp, 9 * wordSize);
- const Register counter = rsi;
+ const Register counter = r11;
#endif
__ enter();
// Save state before entering routine
- __ push(r12);
- __ push(r13);
- __ push(r14);
- __ push(r15);
- __ push(rbx);
+ __ push(r12);//holds pointer to avx512_subkeyHtbl
+ __ push(r14);//holds CTR_CHECK value to check for overflow
+ __ push(r15);//holds number of rounds
+ __ push(rbx);//scratch register
#ifdef _WIN64
// on win64, fill len_reg from stack position
__ push(rsi);
+ __ push(rdi);
__ movptr(key, key_mem);
__ movptr(state, state_mem);
#endif
@@ -262,24 +294,24 @@ address StubGenerator::generate_galoisCounterMode_AESCrypt() {
__ movptr(counter, counter_mem);
// Align stack
__ andq(rsp, -64);
- __ subptr(rsp, 96 * longSize); // Create space on the stack for htbl entries
+ __ subptr(rsp, 200 * longSize); // Create space on the stack for 64 htbl entries and 8 zmm AES entries
__ movptr(avx512_subkeyHtbl, rsp);
- aesgcm_encrypt(in, len, ct, out, key, state, subkeyHtbl, avx512_subkeyHtbl, counter);
+ aesgcm_avx512(in, len, ct, out, key, state, subkeyHtbl, avx512_subkeyHtbl, counter);
__ vzeroupper();
// Restore state before leaving routine
#ifdef _WIN64
__ lea(rsp, Address(rbp, -6 * wordSize));
+ __ pop(rdi);
__ pop(rsi);
#else
- __ lea(rsp, Address(rbp, -5 * wordSize));
+ __ lea(rsp, Address(rbp, -4 * wordSize));
#endif
__ pop(rbx);
__ pop(r15);
__ pop(r14);
- __ pop(r13);
__ pop(r12);
__ leave(); // required for proper stackwalking of RuntimeStub frame
@@ -2708,87 +2740,100 @@ void StubGenerator::gfmul_avx512(XMMRegister GH, XMMRegister HK) {
__ vpternlogq(GH, 0x96, TMP1, TMP2, Assembler::AVX_512bit);
}
-void StubGenerator::generateHtbl_48_block_zmm(Register htbl, Register avx512_htbl, Register rscratch) {
+// Holds 64 Htbl entries, 32 HKey and 32 HkKey (derived from HKey)
+void StubGenerator::generateHtbl_32_blocks_avx512(Register htbl, Register avx512_htbl) {
const XMMRegister HK = xmm6;
- const XMMRegister ZT5 = xmm4;
- const XMMRegister ZT7 = xmm7;
- const XMMRegister ZT8 = xmm8;
-
- Label GFMUL_AVX512;
+ const XMMRegister ZT1 = xmm0, ZT2 = xmm1, ZT3 = xmm2, ZT4 = xmm3;
+ const XMMRegister ZT5 = xmm4, ZT6 = xmm5, ZT7 = xmm7, ZT8 = xmm8;
+ const XMMRegister ZT10 = xmm10, ZT11 = xmm11, ZT12 = xmm12;
__ movdqu(HK, Address(htbl, 0));
- __ movdqu(xmm10, ExternalAddress(ghash_long_swap_mask_addr()), rscratch);
- __ vpshufb(HK, HK, xmm10, Assembler::AVX_128bit);
-
- __ movdqu(xmm11, ExternalAddress(ghash_polynomial_addr()), rscratch);
- __ movdqu(xmm12, ExternalAddress(ghash_polynomial_two_one_addr()), rscratch);
+ __ movdqu(ZT10, ExternalAddress(ghash_long_swap_mask_addr()), r15);
+ __ vpshufb(HK, HK, ZT10, Assembler::AVX_128bit);
+ __ movdqu(ZT11, ExternalAddress(ghash_polynomial_addr()), r15);
+ __ movdqu(ZT12, ExternalAddress(ghash_polynomial_two_one_addr()), r15);
// Compute H ^ 2 from the input subkeyH
- __ movdqu(xmm2, xmm6);
- __ vpsllq(xmm6, xmm6, 1, Assembler::AVX_128bit);
- __ vpsrlq(xmm2, xmm2, 63, Assembler::AVX_128bit);
- __ movdqu(xmm1, xmm2);
- __ vpslldq(xmm2, xmm2, 8, Assembler::AVX_128bit);
- __ vpsrldq(xmm1, xmm1, 8, Assembler::AVX_128bit);
- __ vpor(xmm6, xmm6, xmm2, Assembler::AVX_128bit);
+ __ movdqu(ZT3, HK);
+ __ vpsllq(HK, HK, 1, Assembler::AVX_128bit);
+ __ vpsrlq(ZT3, ZT3, 63, Assembler::AVX_128bit);
+ __ movdqu(ZT2, ZT3);
+ __ vpslldq(ZT3, ZT3, 8, Assembler::AVX_128bit);
+ __ vpsrldq(ZT2, ZT2, 8, Assembler::AVX_128bit);
+ __ vpor(HK, HK, ZT3, Assembler::AVX_128bit);
+ __ vpshufd(ZT3, ZT2, 0x24, Assembler::AVX_128bit);
+ __ vpcmpeqd(ZT3, ZT3, ZT12, Assembler::AVX_128bit);
+ __ vpand(ZT3, ZT3, ZT11, Assembler::AVX_128bit);
+ __ vpxor(HK, HK, ZT3, Assembler::AVX_128bit);
+ __ movdqu(Address(avx512_htbl, 16 * 31), HK); // H ^ 2
- __ vpshufd(xmm2, xmm1, 0x24, Assembler::AVX_128bit);
- __ vpcmpeqd(xmm2, xmm2, xmm12, Assembler::AVX_128bit);
- __ vpand(xmm2, xmm2, xmm11, Assembler::AVX_128bit);
- __ vpxor(xmm6, xmm6, xmm2, Assembler::AVX_128bit);
- __ movdqu(Address(avx512_htbl, 16 * 47), xmm6); // H ^ 2
- // Compute the remaining three powers of H using XMM registers and all following powers using ZMM
__ movdqu(ZT5, HK);
- __ vinserti32x4(ZT7, ZT7, HK, 3);
+ __ evinserti64x2(ZT7, ZT7, HK, 3, Assembler::AVX_512bit);
+ //calculate HashKey ^ 2 << 1 mod poly
gfmul_avx512(ZT5, HK);
- __ movdqu(Address(avx512_htbl, 16 * 46), ZT5); // H ^ 2 * 2
- __ vinserti32x4(ZT7, ZT7, ZT5, 2);
+ __ movdqu(Address(avx512_htbl, 16 * 30), ZT5);
+ __ evinserti64x2(ZT7, ZT7, ZT5, 2, Assembler::AVX_512bit);
+ //calculate HashKey ^ 3 << 1 mod poly
gfmul_avx512(ZT5, HK);
- __ movdqu(Address(avx512_htbl, 16 * 45), ZT5); // H ^ 2 * 3
- __ vinserti32x4(ZT7, ZT7, ZT5, 1);
+ __ movdqu(Address(avx512_htbl, 16 * 29), ZT5);
+ __ evinserti64x2(ZT7, ZT7, ZT5, 1, Assembler::AVX_512bit);
+ //calculate HashKey ^ 4 << 1 mod poly
gfmul_avx512(ZT5, HK);
- __ movdqu(Address(avx512_htbl, 16 * 44), ZT5); // H ^ 2 * 4
- __ vinserti32x4(ZT7, ZT7, ZT5, 0);
-
- __ evshufi64x2(ZT5, ZT5, ZT5, 0x00, Assembler::AVX_512bit);
- __ evmovdquq(ZT8, ZT7, Assembler::AVX_512bit);
- gfmul_avx512(ZT7, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 40), ZT7, Assembler::AVX_512bit);
- __ evshufi64x2(ZT5, ZT7, ZT7, 0x00, Assembler::AVX_512bit);
- gfmul_avx512(ZT8, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 36), ZT8, Assembler::AVX_512bit);
+ __ movdqu(Address(avx512_htbl, 16 * 28), ZT5);
+ __ evinserti64x2(ZT7, ZT7, ZT5, 0, Assembler::AVX_512bit);
+ // ZT5 amd ZT7 to be cleared(hash key)
+ //calculate HashKeyK = HashKey x POLY
+ __ evmovdquq(xmm11, ExternalAddress(ghash_polynomial_addr()), Assembler::AVX_512bit, r15);
+ __ evpclmulqdq(ZT1, ZT7, xmm11, 0x10, Assembler::AVX_512bit);
+ __ vpshufd(ZT2, ZT7, 78, Assembler::AVX_512bit);
+ __ evpxorq(ZT1, ZT1, ZT2, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_htbl, 16 * 60), ZT1, Assembler::AVX_512bit);
+ //**ZT1 amd ZT2 to be cleared(hash key)
+
+ //switch to 4x128 - bit computations now
+ __ evshufi64x2(ZT5, ZT5, ZT5, 0x00, Assembler::AVX_512bit); //;; broadcast HashKey ^ 4 across all ZT5
+ __ evmovdquq(ZT8, ZT7, Assembler::AVX_512bit);//; save HashKey ^ 4 to HashKey ^ 1 in ZT8
+ //**ZT8 to be cleared(hash key)
+
+ //calculate HashKey ^ 5 << 1 mod poly, HashKey ^ 6 << 1 mod poly, ... HashKey ^ 8 << 1 mod poly
gfmul_avx512(ZT7, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 32), ZT7, Assembler::AVX_512bit);
- gfmul_avx512(ZT8, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 28), ZT8, Assembler::AVX_512bit);
- gfmul_avx512(ZT7, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 24), ZT7, Assembler::AVX_512bit);
- gfmul_avx512(ZT8, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 20), ZT8, Assembler::AVX_512bit);
- gfmul_avx512(ZT7, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 16), ZT7, Assembler::AVX_512bit);
- gfmul_avx512(ZT8, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 12), ZT8, Assembler::AVX_512bit);
- gfmul_avx512(ZT7, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 8), ZT7, Assembler::AVX_512bit);
- gfmul_avx512(ZT8, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 4), ZT8, Assembler::AVX_512bit);
- gfmul_avx512(ZT7, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 0), ZT7, Assembler::AVX_512bit);
- __ ret(0);
-}
-
-#define vclmul_reduce(out, poly, hi128, lo128, tmp0, tmp1) \
-__ evpclmulqdq(tmp0, poly, lo128, 0x01, Assembler::AVX_512bit); \
-__ vpslldq(tmp0, tmp0, 8, Assembler::AVX_512bit); \
-__ evpxorq(tmp0, lo128, tmp0, Assembler::AVX_512bit); \
-__ evpclmulqdq(tmp1, poly, tmp0, 0x00, Assembler::AVX_512bit); \
-__ vpsrldq(tmp1, tmp1, 4, Assembler::AVX_512bit); \
-__ evpclmulqdq(out, poly, tmp0, 0x10, Assembler::AVX_512bit); \
-__ vpslldq(out, out, 4, Assembler::AVX_512bit); \
-__ vpternlogq(out, 0x96, tmp1, hi128, Assembler::AVX_512bit); \
+ __ evmovdquq(Address(avx512_htbl, 16 * 24), ZT7, Assembler::AVX_512bit);//; HashKey ^ 8 to HashKey ^ 5 in ZT7 now
+
+ //calculate HashKeyX = HashKey x POLY
+ __ evpclmulqdq(ZT1, ZT7, xmm11, 0x10, Assembler::AVX_512bit);
+ __ vpshufd(ZT2, ZT7, 78, Assembler::AVX_512bit);
+ __ evpxorq(ZT1, ZT1, ZT2, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_htbl, 16 * 56), ZT1, Assembler::AVX_512bit);
+
+ __ evshufi64x2(ZT5, ZT7, ZT7, 0x00, Assembler::AVX_512bit);//;; broadcast HashKey ^ 8 across all ZT5
+
+ for (int i = 20, j = 52; i > 0;) {
+ gfmul_avx512(ZT8, ZT5);
+ __ evmovdquq(Address(avx512_htbl, 16 * i), ZT8, Assembler::AVX_512bit);
+ //calculate HashKeyK = HashKey x POLY
+ __ evpclmulqdq(ZT1, ZT8, xmm11, 0x10, Assembler::AVX_512bit);
+ __ vpshufd(ZT2, ZT8, 78, Assembler::AVX_512bit);
+ __ evpxorq(ZT1, ZT1, ZT2, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_htbl, 16 * j), ZT1, Assembler::AVX_512bit);
+
+ i -= 4;
+ j -= 4;
+ //compute HashKey ^ (8 + n), HashKey ^ (7 + n), ... HashKey ^ (5 + n)
+ gfmul_avx512(ZT7, ZT5);
+ __ evmovdquq(Address(avx512_htbl, 16 * i), ZT7, Assembler::AVX_512bit);
+
+ //calculate HashKeyK = HashKey x POLY
+ __ evpclmulqdq(ZT1, ZT7, xmm11, 0x10, Assembler::AVX_512bit);
+ __ vpshufd(ZT2, ZT7, 78, Assembler::AVX_512bit);
+ __ evpxorq(ZT1, ZT1, ZT2, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_htbl, 16 * j), ZT1, Assembler::AVX_512bit);
+
+ i -= 4;
+ j -= 4;
+ }
+ }
#define vhpxori4x128(reg, tmp) \
__ vextracti64x4(tmp, reg, 1); \
@@ -2820,21 +2865,17 @@ __ evmovdquq(dst2, Address(src, position, Address::times_1, 1 * 64), Assembler::
__ evmovdquq(dst3, Address(src, position, Address::times_1, 2 * 64), Assembler::AVX_512bit); \
__ evmovdquq(dst4, Address(src, position, Address::times_1, 3 * 64), Assembler::AVX_512bit); \
-#define carrylessMultiply(dst00, dst01, dst10, dst11, ghdata, hkey) \
-__ evpclmulqdq(dst00, ghdata, hkey, 0x00, Assembler::AVX_512bit); \
-__ evpclmulqdq(dst01, ghdata, hkey, 0x01, Assembler::AVX_512bit); \
-__ evpclmulqdq(dst10, ghdata, hkey, 0x10, Assembler::AVX_512bit); \
-__ evpclmulqdq(dst11, ghdata, hkey, 0x11, Assembler::AVX_512bit); \
-
-#define shuffleExorRnd1Key(dst0, dst1, dst2, dst3, shufmask, rndkey) \
-__ vpshufb(dst0, dst0, shufmask, Assembler::AVX_512bit); \
-__ evpxorq(dst0, dst0, rndkey, Assembler::AVX_512bit); \
-__ vpshufb(dst1, dst1, shufmask, Assembler::AVX_512bit); \
-__ evpxorq(dst1, dst1, rndkey, Assembler::AVX_512bit); \
-__ vpshufb(dst2, dst2, shufmask, Assembler::AVX_512bit); \
-__ evpxorq(dst2, dst2, rndkey, Assembler::AVX_512bit); \
-__ vpshufb(dst3, dst3, shufmask, Assembler::AVX_512bit); \
-__ evpxorq(dst3, dst3, rndkey, Assembler::AVX_512bit); \
+#define carrylessMultiply(dst00, dst01, dst10, dst11, ghdata, hkey2, hkey1) \
+__ evpclmulqdq(dst00, ghdata, hkey2, 0x00, Assembler::AVX_512bit); \
+__ evpclmulqdq(dst01, ghdata, hkey2, 0x10, Assembler::AVX_512bit); \
+__ evpclmulqdq(dst10, ghdata, hkey1, 0x01, Assembler::AVX_512bit); \
+__ evpclmulqdq(dst11, ghdata, hkey1, 0x11, Assembler::AVX_512bit); \
+
+#define shuffle(dst0, dst1, dst2, dst3, src0, src1, src2, src3, shufmask) \
+__ vpshufb(dst0, src0, shufmask, Assembler::AVX_512bit); \
+__ vpshufb(dst1, src1, shufmask, Assembler::AVX_512bit); \
+__ vpshufb(dst2, src2, shufmask, Assembler::AVX_512bit); \
+__ vpshufb(dst3, src3, shufmask, Assembler::AVX_512bit); \
#define xorBeforeStore(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
__ evpxorq(dst0, dst0, src0, Assembler::AVX_512bit); \
@@ -2848,211 +2889,462 @@ __ vpternlogq(dst1, 0x96, src12, src13, Assembler::AVX_512bit); \
__ vpternlogq(dst2, 0x96, src22, src23, Assembler::AVX_512bit); \
__ vpternlogq(dst3, 0x96, src32, src33, Assembler::AVX_512bit); \
-void StubGenerator::ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, XMMRegister aad_hashx,
- Register in, Register out, Register data, Register pos, bool first_time_reduction, XMMRegister addmask, bool ghash_input, Register rounds,
- Register ghash_pos, bool final_reduction, int i, XMMRegister counter_inc_mask) {
- Label AES_192, AES_256, LAST_AES_RND;
+//schoolbook multiply of 16 blocks(8 x 16 bytes)
+//it is assumed that data read is already shuffledand
+void StubGenerator::ghash16_avx512(bool start_ghash, bool do_reduction, bool uload_shuffle, bool hk_broadcast, bool do_hxor,
+ Register in, Register pos, Register subkeyHtbl, XMMRegister HASH, XMMRegister SHUFM, int in_offset,
+ int in_disp, int displacement, int hashkey_offset) {
const XMMRegister ZTMP0 = xmm0;
const XMMRegister ZTMP1 = xmm3;
const XMMRegister ZTMP2 = xmm4;
const XMMRegister ZTMP3 = xmm5;
+ const XMMRegister ZTMP4 = xmm6;
const XMMRegister ZTMP5 = xmm7;
const XMMRegister ZTMP6 = xmm10;
const XMMRegister ZTMP7 = xmm11;
const XMMRegister ZTMP8 = xmm12;
const XMMRegister ZTMP9 = xmm13;
- const XMMRegister ZTMP10 = xmm15;
- const XMMRegister ZTMP11 = xmm16;
- const XMMRegister ZTMP12 = xmm17;
+ const XMMRegister ZTMPA = xmm26;
+ const XMMRegister ZTMPB = xmm23;
+ const XMMRegister GH = xmm24;
+ const XMMRegister GL = xmm25;
+ const int hkey_gap = 16 * 32;
+
+ if (uload_shuffle) {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp), Assembler::AVX_512bit);
+ __ vpshufb(ZTMP9, ZTMP9, SHUFM, Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp), Assembler::AVX_512bit);
+ }
- const XMMRegister ZTMP13 = xmm19;
- const XMMRegister ZTMP14 = xmm20;
- const XMMRegister ZTMP15 = xmm21;
- const XMMRegister ZTMP16 = xmm30;
- const XMMRegister ZTMP17 = xmm31;
- const XMMRegister ZTMP18 = xmm1;
- const XMMRegister ZTMP19 = xmm2;
- const XMMRegister ZTMP20 = xmm8;
- const XMMRegister ZTMP21 = xmm22;
- const XMMRegister ZTMP22 = xmm23;
+ if (start_ghash) {
+ __ evpxorq(ZTMP9, ZTMP9, HASH, Assembler::AVX_512bit);
+ }
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 0 * 64), Assembler::AVX_512bit);
+ __ evbroadcastf64x2(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 0 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 0 * 64), Assembler::AVX_512bit);
+ __ evmovdquq(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 0 * 64), Assembler::AVX_512bit);
+ }
+
+ carrylessMultiply(ZTMP0, ZTMP1, ZTMP2, ZTMP3, ZTMP9, ZTMPA, ZTMP8);
+
+ //ghash blocks 4 - 7
+ if (uload_shuffle) {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 64), Assembler::AVX_512bit);
+ __ vpshufb(ZTMP9, ZTMP9, SHUFM, Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 64), Assembler::AVX_512bit);
+ }
+
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 1 * 64), Assembler::AVX_512bit);;
+ __ evbroadcastf64x2(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 1 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 1 * 64), Assembler::AVX_512bit);
+ __ evmovdquq(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 1 * 64), Assembler::AVX_512bit);
+ }
+
+ carrylessMultiply(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP9, ZTMPA, ZTMP8);
+
+ //update sums
+ if (start_ghash) {
+ __ evpxorq(GL, ZTMP0, ZTMP2, Assembler::AVX_512bit);//T2 = THL + TLL
+ __ evpxorq(GH, ZTMP1, ZTMP3, Assembler::AVX_512bit);//T1 = THH + TLH
+ } else { //mid, end, end_reduce
+ __ vpternlogq(GL, 0x96, ZTMP0, ZTMP2, Assembler::AVX_512bit);//T2 = THL + TLL
+ __ vpternlogq(GH, 0x96, ZTMP1, ZTMP3, Assembler::AVX_512bit);//T1 = THH + TLH
+ }
+ //ghash blocks 8 - 11
+ if (uload_shuffle) {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 128), Assembler::AVX_512bit);
+ __ vpshufb(ZTMP9, ZTMP9, SHUFM, Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 128), Assembler::AVX_512bit);
+ }
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 2 * 64), Assembler::AVX_512bit);
+ __ evbroadcastf64x2(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 2 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 2 * 64), Assembler::AVX_512bit);
+ __ evmovdquq(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 2 * 64), Assembler::AVX_512bit);
+ }
+
+ carrylessMultiply(ZTMP0, ZTMP1, ZTMP2, ZTMP3, ZTMP9, ZTMPA, ZTMP8);
+
+ //update sums
+ __ vpternlogq(GL, 0x96, ZTMP6, ZTMP4, Assembler::AVX_512bit);//T2 = THL + TLL
+ __ vpternlogq(GH, 0x96, ZTMP7, ZTMP5, Assembler::AVX_512bit);//T1 = THH + TLH
+ //ghash blocks 12 - 15
+ if (uload_shuffle) {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 192), Assembler::AVX_512bit);
+ __ vpshufb(ZTMP9, ZTMP9, SHUFM, Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 192), Assembler::AVX_512bit);
+ }
+
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 3 * 64), Assembler::AVX_512bit);
+ __ evbroadcastf64x2(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 3 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 3 * 64), Assembler::AVX_512bit);
+ __ evmovdquq(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 3 * 64), Assembler::AVX_512bit);
+ }
+ carrylessMultiply(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP9, ZTMPA, ZTMP8);
+
+ //update sums
+ xorGHASH(GL, GH, GL, GH, ZTMP0, ZTMP2, ZTMP1, ZTMP3, ZTMP6, ZTMP4, ZTMP7, ZTMP5);
+
+ if (do_reduction) {
+ //new reduction
+ __ evmovdquq(ZTMPB, ExternalAddress(ghash_polynomial_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+ __ evpclmulqdq(HASH, GL, ZTMPB, 0x10, Assembler::AVX_512bit);
+ __ vpshufd(ZTMP0, GL, 78, Assembler::AVX_512bit);
+ __ vpternlogq(HASH, 0x96, GH, ZTMP0, Assembler::AVX_512bit);
+ if (do_hxor) {
+ vhpxori4x128(HASH, ZTMP0);
+ }
+ }
+}
- // Pre increment counters
- __ vpaddd(ZTMP0, ctr_blockx, counter_inc_mask, Assembler::AVX_512bit);
- __ vpaddd(ZTMP1, ZTMP0, counter_inc_mask, Assembler::AVX_512bit);
- __ vpaddd(ZTMP2, ZTMP1, counter_inc_mask, Assembler::AVX_512bit);
- __ vpaddd(ZTMP3, ZTMP2, counter_inc_mask, Assembler::AVX_512bit);
- // Save counter value
- __ evmovdquq(ctr_blockx, ZTMP3, Assembler::AVX_512bit);
-
- // Reuse ZTMP17 / ZTMP18 for loading AES Keys
- // Pre-load AES round keys
- ev_load_key(ZTMP17, key, 0, xmm29);
- ev_load_key(ZTMP18, key, 1 * 16, xmm29);
-
- // ZTMP19 & ZTMP20 used for loading hash key
- // Pre-load hash key
- __ evmovdquq(ZTMP19, Address(subkeyHtbl, i * 64), Assembler::AVX_512bit);
- __ evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit);
- // Load data for computing ghash
- __ evmovdquq(ZTMP21, Address(data, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
- __ vpshufb(ZTMP21, ZTMP21, xmm24, Assembler::AVX_512bit);
-
- // Xor cipher block 0 with input ghash, if available
- if (ghash_input) {
- __ evpxorq(ZTMP21, ZTMP21, aad_hashx, Assembler::AVX_512bit);
+//Stitched GHASH of 16 blocks(with reduction) with encryption of 0 blocks
+void StubGenerator::gcm_enc_dec_last_avx512(Register len, Register in, Register pos, XMMRegister HASH, XMMRegister SHUFM, Register subkeyHtbl,
+ int ghashin_offset, int hashkey_offset, bool start_ghash, bool do_reduction) {
+ //there is 0 blocks to cipher so there are only 16 blocks for ghash and reduction
+ ghash16_avx512(start_ghash, do_reduction, false, false, true, in, pos, subkeyHtbl, HASH, SHUFM, ghashin_offset, 0, 0, hashkey_offset);
+}
+
+//Main GCM macro stitching cipher with GHASH
+//encrypts 16 blocks at a time
+//ghash the 16 previously encrypted ciphertext blocks
+void StubGenerator::ghash16_encrypt_parallel16_avx512(Register in, Register out, Register ct, Register pos, Register avx512_subkeyHtbl,
+ Register CTR_CHECK, Register NROUNDS, Register key, XMMRegister CTR_BE, XMMRegister GHASH_IN,
+ XMMRegister ADDBE_4x4, XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHFMSK,
+ bool hk_broadcast, bool is_hash_start, bool do_hash_reduction, bool do_hash_hxor,
+ bool no_ghash_in, int ghashin_offset, int aesout_offset, int hashkey_offset) {
+ const XMMRegister B00_03 = xmm0;
+ const XMMRegister B04_07 = xmm3;
+ const XMMRegister B08_11 = xmm4;
+ const XMMRegister B12_15 = xmm5;
+ const XMMRegister THH1 = xmm6;
+ const XMMRegister THL1 = xmm7;
+ const XMMRegister TLH1 = xmm10;
+ const XMMRegister TLL1 = xmm11, THH2 = xmm12, THL2 = xmm13, TLH2 = xmm15;
+ const XMMRegister TLL2 = xmm16, THH3 = xmm17, THL3 = xmm19, TLH3 = xmm20;
+ const XMMRegister TLL3 = xmm21, DATA1 = xmm17, DATA2 = xmm19, DATA3 = xmm20, DATA4 = xmm21;
+ const XMMRegister AESKEY1 = xmm30, AESKEY2 = xmm31;
+ const XMMRegister GHKEY1 = xmm1, GHKEY2 = xmm18, GHDAT1 = xmm8, GHDAT2 = xmm22;
+ const XMMRegister ZT = xmm23, TO_REDUCE_L = xmm25, TO_REDUCE_H = xmm24;
+ const int hkey_gap = 16 * 32;
+
+ Label blocks_overflow, blocks_ok, skip_shuffle, cont, aes_256, aes_192, last_aes_rnd;
+
+ __ cmpb(CTR_CHECK, (256 - 16));
+ __ jcc(Assembler::aboveEqual, blocks_overflow);
+ __ vpaddd(B00_03, CTR_BE, ADDBE_1234, Assembler::AVX_512bit);
+ __ vpaddd(B04_07, B00_03, ADDBE_4x4, Assembler::AVX_512bit);
+ __ vpaddd(B08_11, B04_07, ADDBE_4x4, Assembler::AVX_512bit);
+ __ vpaddd(B12_15, B08_11, ADDBE_4x4, Assembler::AVX_512bit);
+ __ jmp(blocks_ok);
+ __ bind(blocks_overflow);
+ __ vpshufb(CTR_BE, CTR_BE, SHFMSK, Assembler::AVX_512bit);
+ __ evmovdquq(B12_15, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+ __ vpaddd(B00_03, CTR_BE, ADD_1234, Assembler::AVX_512bit);
+ __ vpaddd(B04_07, B00_03, B12_15, Assembler::AVX_512bit);
+ __ vpaddd(B08_11, B04_07, B12_15, Assembler::AVX_512bit);
+ __ vpaddd(B12_15, B08_11, B12_15, Assembler::AVX_512bit);
+ shuffle(B00_03, B04_07, B08_11, B12_15, B00_03, B04_07, B08_11, B12_15, SHFMSK);
+
+ __ bind(blocks_ok);
+
+ //pre - load constants
+ ev_load_key(AESKEY1, key, 0, rbx);
+ if (!no_ghash_in) {
+ __ evpxorq(GHDAT1, GHASH_IN, Address(avx512_subkeyHtbl, 16 * ghashin_offset), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(GHDAT1, Address(avx512_subkeyHtbl, 16 * ghashin_offset), Assembler::AVX_512bit);
+ }
+
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 0 * 64), Assembler::AVX_512bit);
+ __ evbroadcastf64x2(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 0 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 0 * 64), Assembler::AVX_512bit);
+ __ evmovdquq(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 0 * 64), Assembler::AVX_512bit);
+ }
+
+ //save counter for the next round
+ //increment counter overflow check register
+ __ evshufi64x2(CTR_BE, B12_15, B12_15, 255, Assembler::AVX_512bit);
+ __ addb(CTR_CHECK, 16);
+
+ //pre - load constants
+ ev_load_key(AESKEY2, key, 1 * 16, rbx);
+ __ evmovdquq(GHDAT2, Address(avx512_subkeyHtbl, 16 * (ghashin_offset +4)), Assembler::AVX_512bit);
+
+ //stitch AES rounds with GHASH
+ //AES round 0
+ __ evpxorq(B00_03, B00_03, AESKEY1, Assembler::AVX_512bit);
+ __ evpxorq(B04_07, B04_07, AESKEY1, Assembler::AVX_512bit);
+ __ evpxorq(B08_11, B08_11, AESKEY1, Assembler::AVX_512bit);
+ __ evpxorq(B12_15, B12_15, AESKEY1, Assembler::AVX_512bit);
+ ev_load_key(AESKEY1, key, 2 * 16, rbx);
+
+ //GHASH 4 blocks(15 to 12)
+ carrylessMultiply(TLL1, TLH1, THL1, THH1, GHDAT1, GHKEY2, GHKEY1);
+
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 1 * 64), Assembler::AVX_512bit);
+ __ evbroadcastf64x2(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 1 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 1 * 64), Assembler::AVX_512bit);
+ __ evmovdquq(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 1 * 64), Assembler::AVX_512bit);
+ }
+
+ __ evmovdquq(GHDAT1, Address(avx512_subkeyHtbl, 16 * (ghashin_offset + 8)), Assembler::AVX_512bit);
+
+ //AES round 1
+ roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
+
+ ev_load_key(AESKEY2, key, 3 * 16, rbx);
+
+ //GHASH 4 blocks(11 to 8)
+ carrylessMultiply(TLL2, TLH2, THL2, THH2, GHDAT2, GHKEY2, GHKEY1);
+
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 2 * 64), Assembler::AVX_512bit);
+ __ evbroadcastf64x2(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 2 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 2 * 64 ), Assembler::AVX_512bit);
+ __ evmovdquq(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 2 * 64), Assembler::AVX_512bit);
}
- // Load data for computing ghash
- __ evmovdquq(ZTMP22, Address(data, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
- __ vpshufb(ZTMP22, ZTMP22, xmm24, Assembler::AVX_512bit);
-
- // stitch AES rounds with GHASH
- // AES round 0, xmm24 has shuffle mask
- shuffleExorRnd1Key(ZTMP0, ZTMP1, ZTMP2, ZTMP3, xmm24, ZTMP17);
- // Reuse ZTMP17 / ZTMP18 for loading remaining AES Keys
- ev_load_key(ZTMP17, key, 2 * 16, xmm29);
- // GHASH 4 blocks
- carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP21, ZTMP19);
- // Load the next hkey and Ghash data
- __ evmovdquq(ZTMP19, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit);
- __ evmovdquq(ZTMP21, Address(data, ghash_pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
- __ vpshufb(ZTMP21, ZTMP21, xmm24, Assembler::AVX_512bit);
-
- // AES round 1
- roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP18, key, 3 * 16, xmm29);
-
- // GHASH 4 blocks(11 to 8)
- carrylessMultiply(ZTMP10, ZTMP12, ZTMP11, ZTMP9, ZTMP22, ZTMP20);
- // Load the next hkey and GDATA
- __ evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit);
- __ evmovdquq(ZTMP22, Address(data, ghash_pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
- __ vpshufb(ZTMP22, ZTMP22, xmm24, Assembler::AVX_512bit);
-
- // AES round 2
- roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP17, key, 4 * 16, xmm29);
-
- // GHASH 4 blocks(7 to 4)
- carrylessMultiply(ZTMP14, ZTMP16, ZTMP15, ZTMP13, ZTMP21, ZTMP19);
- // AES rounds 3
- roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP18, key, 5 * 16, xmm29);
-
- // Gather(XOR) GHASH for 12 blocks
- xorGHASH(ZTMP5, ZTMP6, ZTMP8, ZTMP7, ZTMP9, ZTMP13, ZTMP10, ZTMP14, ZTMP12, ZTMP16, ZTMP11, ZTMP15);
-
- // AES rounds 4
- roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP17, key, 6 * 16, xmm29);
-
- // load plain / cipher text(recycle registers)
- loadData(in, pos, ZTMP13, ZTMP14, ZTMP15, ZTMP16);
-
- // AES rounds 5
- roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP18, key, 7 * 16, xmm29);
- // GHASH 4 blocks(3 to 0)
- carrylessMultiply(ZTMP10, ZTMP12, ZTMP11, ZTMP9, ZTMP22, ZTMP20);
-
- // AES round 6
- roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP17, key, 8 * 16, xmm29);
-
- // gather GHASH in ZTMP6(low) and ZTMP5(high)
- if (first_time_reduction) {
- __ vpternlogq(ZTMP7, 0x96, ZTMP8, ZTMP12, Assembler::AVX_512bit);
- __ evpxorq(xmm25, ZTMP7, ZTMP11, Assembler::AVX_512bit);
- __ evpxorq(xmm27, ZTMP5, ZTMP9, Assembler::AVX_512bit);
- __ evpxorq(xmm26, ZTMP6, ZTMP10, Assembler::AVX_512bit);
- } else if (!first_time_reduction && !final_reduction) {
- xorGHASH(ZTMP7, xmm25, xmm27, xmm26, ZTMP8, ZTMP12, ZTMP7, ZTMP11, ZTMP5, ZTMP9, ZTMP6, ZTMP10);
+ __ evmovdquq(GHDAT2, Address(avx512_subkeyHtbl, 16 * (ghashin_offset + 12)), Assembler::AVX_512bit);
+
+ //AES round 2
+ roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY1, key, 4 * 16, rbx);
+
+ //GHASH 4 blocks(7 to 4)
+ carrylessMultiply(TLL3, TLH3, THL3, THH3, GHDAT1, GHKEY2, GHKEY1);
+
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 3 * 64), Assembler::AVX_512bit);
+ __ evbroadcastf64x2(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 3 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 3 * 64), Assembler::AVX_512bit);
+ __ evmovdquq(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 3 * 64), Assembler::AVX_512bit);
}
- if (final_reduction) {
- // Phase one: Add mid products together
- // Also load polynomial constant for reduction
- __ vpternlogq(ZTMP7, 0x96, ZTMP8, ZTMP12, Assembler::AVX_512bit);
- __ vpternlogq(ZTMP7, 0x96, xmm25, ZTMP11, Assembler::AVX_512bit);
- __ vpsrldq(ZTMP11, ZTMP7, 8, Assembler::AVX_512bit);
- __ vpslldq(ZTMP7, ZTMP7, 8, Assembler::AVX_512bit);
- __ evmovdquq(ZTMP12, ExternalAddress(ghash_polynomial_reduction_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+ //AES rounds 3
+ roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY2, key, 5 * 16, rbx);
+
+ //Gather(XOR) GHASH for 12 blocks
+ xorGHASH(TLL1, TLH1, THL1, THH1, TLL2, TLL3, TLH2, TLH3, THL2, THL3, THH2, THH3);
+
+ //AES rounds 4
+ roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY1, key, 6 * 16, rbx);
+
+ //load plain / cipher text(recycle GH3xx registers)
+ loadData(in, pos, DATA1, DATA2, DATA3, DATA4);
+
+ //AES rounds 5
+ roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY2, key, 7 * 16, rbx);
+
+ //GHASH 4 blocks(3 to 0)
+ carrylessMultiply(TLL2, TLH2, THL2, THH2, GHDAT2, GHKEY2, GHKEY1);
+
+ //AES round 6
+ roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY1, key, 8 * 16, rbx);
+
+ //gather GHASH in TO_REDUCE_H / L
+ if (is_hash_start) {
+ __ evpxorq(TO_REDUCE_L, TLL2, THL2, Assembler::AVX_512bit);
+ __ evpxorq(TO_REDUCE_H, THH2, TLH2, Assembler::AVX_512bit);
+ __ vpternlogq(TO_REDUCE_L, 0x96, TLL1, THL1, Assembler::AVX_512bit);
+ __ vpternlogq(TO_REDUCE_H, 0x96, THH1, TLH1, Assembler::AVX_512bit);
+ } else {
+ //not the first round so sums need to be updated
+ xorGHASH(TO_REDUCE_L, TO_REDUCE_H, TO_REDUCE_L, TO_REDUCE_H, TLL2, THL2, THH2, TLH2, TLL1, THL1, THH1, TLH1);
}
- // AES round 7
- roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP18, key, 9 * 16, xmm29);
- if (final_reduction) {
- __ vpternlogq(ZTMP5, 0x96, ZTMP9, ZTMP11, Assembler::AVX_512bit);
- __ evpxorq(ZTMP5, ZTMP5, xmm27, Assembler::AVX_512bit);
- __ vpternlogq(ZTMP6, 0x96, ZTMP10, ZTMP7, Assembler::AVX_512bit);
- __ evpxorq(ZTMP6, ZTMP6, xmm26, Assembler::AVX_512bit);
+
+ //AES round 7
+ roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY2, key, 9 * 16, rbx);
+
+ //new reduction
+ if (do_hash_reduction) {
+ __ evmovdquq(ZT, ExternalAddress(ghash_polynomial_reduction_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+ __ evpclmulqdq(THH1, TO_REDUCE_L, ZT, 0x10, Assembler::AVX_512bit);
+ __ vpshufd(TO_REDUCE_L, TO_REDUCE_L, 78, Assembler::AVX_512bit);
+ __ vpternlogq(THH1, 0x96, TO_REDUCE_H, TO_REDUCE_L, Assembler::AVX_512bit);
}
- // AES round 8
- roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP17, key, 10 * 16, xmm29);
-
- // Horizontal xor of low and high 4*128
- if (final_reduction) {
- vhpxori4x128(ZTMP5, ZTMP9);
- vhpxori4x128(ZTMP6, ZTMP10);
+
+ //AES round 8
+ roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY1, key, 10 * 16, rbx);
+
+ //horizontalxor of 4 reduced hashes
+ if (do_hash_hxor) {
+ vhpxori4x128(THH1, TLL1);
}
- // AES round 9
- roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- // First phase of reduction
- if (final_reduction) {
- __ evpclmulqdq(ZTMP10, ZTMP12, ZTMP6, 0x01, Assembler::AVX_128bit);
- __ vpslldq(ZTMP10, ZTMP10, 8, Assembler::AVX_128bit);
- __ evpxorq(ZTMP10, ZTMP6, ZTMP10, Assembler::AVX_128bit);
+
+ //AES round 9
+ roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY2, key, 11 * 16, rbx);
+ //AES rounds up to 11 (AES192) or 13 (AES256)
+ //AES128 is done
+ __ cmpl(NROUNDS, 52);
+ __ jcc(Assembler::less, last_aes_rnd);
+ __ bind(aes_192);
+ roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY1, key, 12 * 16, rbx);
+ roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
+ __ cmpl(NROUNDS, 60);
+ __ jcc(Assembler::less, last_aes_rnd);
+ __ bind(aes_256);
+ ev_load_key(AESKEY2, key, 13 * 16, rbx);
+ roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY1, key, 14 * 16, rbx);
+ roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
+
+ __ bind(last_aes_rnd);
+ //the last AES round
+ lastroundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
+ //AESKEY1and AESKEY2 contain AES round keys
+
+ //XOR against plain / cipher text
+ xorBeforeStore(B00_03, B04_07, B08_11, B12_15, DATA1, DATA2, DATA3, DATA4);
+
+ //store cipher / plain text
+ storeData(out, pos, B00_03, B04_07, B08_11, B12_15);
+ //**B00_03, B04_07, B08_011, B12_B15 may contain sensitive data
+
+ //shuffle cipher text blocks for GHASH computation
+ __ cmpptr(ct, out);
+ __ jcc(Assembler::notEqual, skip_shuffle);
+ shuffle(B00_03, B04_07, B08_11, B12_15, B00_03, B04_07, B08_11, B12_15, SHFMSK);
+ __ jmp(cont);
+ __ bind(skip_shuffle);
+ shuffle(B00_03, B04_07, B08_11, B12_15, DATA1, DATA2, DATA3, DATA4, SHFMSK);
+
+ //**B00_03, B04_07, B08_011, B12_B15 overwritten with shuffled cipher text
+ __ bind(cont);
+ //store shuffled cipher text for ghashing
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * aesout_offset), B00_03, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (aesout_offset + 4)), B04_07, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (aesout_offset + 8)), B08_11, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (aesout_offset + 12)), B12_15, Assembler::AVX_512bit);
+}
+
+
+//Encrypt / decrypt the initial 16 blocks
+void StubGenerator::initial_blocks_16_avx512(Register in, Register out, Register ct, Register pos, Register key, Register avx512_subkeyHtbl,
+ Register CTR_CHECK, Register rounds, XMMRegister CTR, XMMRegister GHASH, XMMRegister ADDBE_4x4,
+ XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK, int stack_offset) {
+ const XMMRegister B00_03 = xmm7;
+ const XMMRegister B04_07 = xmm10;
+ const XMMRegister B08_11 = xmm11;
+ const XMMRegister B12_15 = xmm12;
+ const XMMRegister T0 = xmm0;
+ const XMMRegister T1 = xmm3;
+ const XMMRegister T2 = xmm4;
+ const XMMRegister T3 = xmm5;
+ const XMMRegister T4 = xmm6;
+ const XMMRegister T5 = xmm30;
+
+ Label next_16_overflow, next_16_ok, cont, skip_shuffle, aes_256, aes_192, last_aes_rnd;
+ //prepare counter blocks
+ __ cmpb(CTR_CHECK, (256 - 16));
+ __ jcc(Assembler::aboveEqual, next_16_overflow);
+ __ vpaddd(B00_03, CTR, ADDBE_1234, Assembler::AVX_512bit);
+ __ vpaddd(B04_07, B00_03, ADDBE_4x4, Assembler::AVX_512bit);
+ __ vpaddd(B08_11, B04_07, ADDBE_4x4, Assembler::AVX_512bit);
+ __ vpaddd(B12_15, B08_11, ADDBE_4x4, Assembler::AVX_512bit);
+ __ jmp(next_16_ok);
+ __ bind(next_16_overflow);
+ __ vpshufb(CTR, CTR, SHUF_MASK, Assembler::AVX_512bit);
+ __ evmovdquq(B12_15, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, rbx);
+ __ vpaddd(B00_03, CTR, ADD_1234, Assembler::AVX_512bit);
+ __ vpaddd(B04_07, B00_03, B12_15, Assembler::AVX_512bit);
+ __ vpaddd(B08_11, B04_07, B12_15, Assembler::AVX_512bit);
+ __ vpaddd(B12_15, B08_11, B12_15, Assembler::AVX_512bit);
+ shuffle(B00_03, B04_07, B08_11, B12_15, B00_03, B04_07, B08_11, B12_15, SHUF_MASK);
+ __ bind(next_16_ok);
+ __ evshufi64x2(CTR, B12_15, B12_15, 255, Assembler::AVX_512bit);
+ __ addb(CTR_CHECK, 16);
+
+ //load 16 blocks of data
+ loadData(in, pos, T0, T1, T2, T3);
+
+ //move to AES encryption rounds
+ __ movdqu(T5, ExternalAddress(key_shuffle_mask_addr()), rbx /*rscratch*/);
+ ev_load_key(T4, key, 0, T5);
+ __ evpxorq(B00_03, B00_03, T4, Assembler::AVX_512bit);
+ __ evpxorq(B04_07, B04_07, T4, Assembler::AVX_512bit);
+ __ evpxorq(B08_11, B08_11, T4, Assembler::AVX_512bit);
+ __ evpxorq(B12_15, B12_15, T4, Assembler::AVX_512bit);
+
+ for (int i = 1; i < 10; i++) {
+ ev_load_key(T4, key, i * 16, T5);
+ roundEncode(T4, B00_03, B04_07, B08_11, B12_15);
}
+
+ ev_load_key(T4, key, 10 * 16, T5);
__ cmpl(rounds, 52);
- __ jcc(Assembler::greaterEqual, AES_192);
- __ jmp(LAST_AES_RND);
- // AES rounds up to 11 (AES192) or 13 (AES256)
- __ bind(AES_192);
- roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP18, key, 11 * 16, xmm29);
- roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP17, key, 12 * 16, xmm29);
+ __ jcc(Assembler::less, last_aes_rnd);
+ __ bind(aes_192);
+ roundEncode(T4, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(T4, key, 16 * 11, T5);
+ roundEncode(T4, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(T4, key, 16 * 12, T5);
__ cmpl(rounds, 60);
- __ jcc(Assembler::aboveEqual, AES_256);
- __ jmp(LAST_AES_RND);
-
- __ bind(AES_256);
- roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP18, key, 13 * 16, xmm29);
- roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP17, key, 14 * 16, xmm29);
-
- __ bind(LAST_AES_RND);
- // Second phase of reduction
- if (final_reduction) {
- __ evpclmulqdq(ZTMP9, ZTMP12, ZTMP10, 0x00, Assembler::AVX_128bit);
- __ vpsrldq(ZTMP9, ZTMP9, 4, Assembler::AVX_128bit); // Shift-R 1-DW to obtain 2-DWs shift-R
- __ evpclmulqdq(ZTMP11, ZTMP12, ZTMP10, 0x10, Assembler::AVX_128bit);
- __ vpslldq(ZTMP11, ZTMP11, 4, Assembler::AVX_128bit); // Shift-L 1-DW for result
- // ZTMP5 = ZTMP5 X ZTMP11 X ZTMP9
- __ vpternlogq(ZTMP5, 0x96, ZTMP11, ZTMP9, Assembler::AVX_128bit);
- }
- // Last AES round
- lastroundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- // XOR against plain / cipher text
- xorBeforeStore(ZTMP0, ZTMP1, ZTMP2, ZTMP3, ZTMP13, ZTMP14, ZTMP15, ZTMP16);
- // store cipher / plain text
- storeData(out, pos, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
+ __ jcc(Assembler::less, last_aes_rnd);
+ __ bind(aes_256);
+ roundEncode(T4, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(T4, key, 16 * 13, T5);
+ roundEncode(T4, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(T4, key, 16 * 14, T5);
+
+ __ bind(last_aes_rnd);
+ lastroundEncode(T4, B00_03, B04_07, B08_11, B12_15);
+
+ //xor against text
+ xorBeforeStore(B00_03, B04_07, B08_11, B12_15, T0, T1, T2, T3);
+
+ //store
+ storeData(out, pos, B00_03, B04_07, B08_11, B12_15);
+
+ __ cmpptr(ct, out);
+ __ jcc(Assembler::equal, skip_shuffle);
+ //decryption - cipher text needs to go to GHASH phase
+ shuffle(B00_03, B04_07, B08_11, B12_15, T0, T1, T2, T3, SHUF_MASK);
+ __ jmp(cont);
+ __ bind(skip_shuffle);
+ shuffle(B00_03, B04_07, B08_11, B12_15, B00_03, B04_07, B08_11, B12_15, SHUF_MASK);
+
+ //B00_03, B04_07, B08_11, B12_15 overwritten with shuffled cipher text
+ __ bind(cont);
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * stack_offset), B00_03, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (stack_offset + 4)), B04_07, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (stack_offset + 8)), B08_11, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (stack_offset + 12)), B12_15, Assembler::AVX_512bit);
}
-void StubGenerator::aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
- Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter) {
- Label ENC_DEC_DONE, GENERATE_HTBL_48_BLKS, AES_192, AES_256, STORE_CT, GHASH_LAST_32,
- AES_32_BLOCKS, GHASH_AES_PARALLEL, LOOP, ACCUMULATE, GHASH_16_AES_16;
- const XMMRegister CTR_BLOCKx = xmm9;
+void StubGenerator::aesgcm_avx512(Register in, Register len, Register ct, Register out, Register key, Register state,
+ Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter) {
+ Label ENC_DEC_DONE, MESG_BELOW_32_BLKS, NO_BIG_BLKS, ENCRYPT_BIG_BLKS_NO_HXOR,
+ ENCRYPT_BIG_NBLKS, ENCRYPT_16_BLKS, ENCRYPT_N_GHASH_32_N_BLKS, GHASH_DONE;
+ const XMMRegister CTR_BLOCKx = xmm2;
const XMMRegister AAD_HASHx = xmm14;
- const Register pos = rax;
- const Register rounds = r15;
- const Register ghash_pos = NOT_WIN64( r14) WIN64_ONLY( r11 );
const XMMRegister ZTMP0 = xmm0;
- const XMMRegister ZTMP1 = xmm3;
- const XMMRegister ZTMP2 = xmm4;
- const XMMRegister ZTMP3 = xmm5;
+ const XMMRegister ZTMP1 = xmm3; //**sensitive
+ const XMMRegister ZTMP2 = xmm4; //**sensitive(small data)
+ const XMMRegister ZTMP3 = xmm5; //**sensitive(small data)
const XMMRegister ZTMP4 = xmm6;
const XMMRegister ZTMP5 = xmm7;
const XMMRegister ZTMP6 = xmm10;
@@ -3066,235 +3358,170 @@ void StubGenerator::aesgcm_encrypt(Register in, Register len, Register ct, Regis
const XMMRegister ZTMP14 = xmm20;
const XMMRegister ZTMP15 = xmm21;
const XMMRegister ZTMP16 = xmm30;
- const XMMRegister COUNTER_INC_MASK = xmm18;
-
- __ movl(pos, 0); // Total length processed
- // Min data size processed = 768 bytes
- __ cmpl(len, 768);
- __ jcc(Assembler::less, ENC_DEC_DONE);
+ const XMMRegister ZTMP17 = xmm31;
+ const XMMRegister ZTMP18 = xmm1;
+ const XMMRegister ZTMP19 = xmm18;
+ const XMMRegister ZTMP20 = xmm8;
+ const XMMRegister ZTMP21 = xmm22;
+ const XMMRegister ZTMP22 = xmm23;
+ const XMMRegister ZTMP23 = xmm26;
+ const XMMRegister GH = xmm24;
+ const XMMRegister GL = xmm25;
+ const XMMRegister SHUF_MASK = xmm29;
+ const XMMRegister ADDBE_4x4 = xmm27;
+ const XMMRegister ADDBE_1234 = xmm28;
+ const XMMRegister ADD_1234 = xmm9;
+ const KRegister MASKREG = k1;
+ const Register pos = rax;
+ const Register rounds = r15;
+ const Register CTR_CHECK = r14;
- // Generate 48 constants for htbl
- __ call(GENERATE_HTBL_48_BLKS, relocInfo::none);
- int index = 0; // Index for choosing subkeyHtbl entry
- __ movl(ghash_pos, 0); // Pointer for ghash read and store operations
+ const int stack_offset = 64;
+ const int ghashin_offset = 64;
+ const int aesout_offset = 64;
+ const int hashkey_offset = 0;
+ const int hashkey_gap = 16 * 32;
+ const int HashKey_32 = 0;
+ const int HashKey_16 = 16 * 16;
- // Move initial counter value and STATE value into variables
+ __ movl(pos, 0);
+ __ cmpl(len, 256);
+ __ jcc(Assembler::lessEqual, ENC_DEC_DONE);
+
+ /* Structure of the Htbl is as follows:
+ * Where 0 - 31 we have 32 Hashkey's and 32-63 we have 32 HashKeyK (derived from HashKey)
+ * Rest 8 entries are for storing CTR values post AES rounds
+ * ----------------------------------------------------------------------------------------
+ Hashkey32 -> 16 * 0
+ Hashkey31 -> 16 * 1
+ Hashkey30 -> 16 * 2
+ ........
+ Hashkey1 -> 16 * 31
+ ---------------------
+ HaskeyK32 -> 16 * 32
+ HashkeyK31 -> 16 * 33
+ .........
+ HashkeyK1 -> 16 * 63
+ ---------------------
+ 1st set of AES Entries
+ B00_03 -> 16 * 64
+ B04_07 -> 16 * 68
+ B08_11 -> 16 * 72
+ B12_15 -> 16 * 80
+ ---------------------
+ 2nd set of AES Entries
+ B00_03 -> 16 * 84
+ B04_07 -> 16 * 88
+ B08_11 -> 16 * 92
+ B12_15 -> 16 * 96
+ ---------------------*/
+ generateHtbl_32_blocks_avx512(subkeyHtbl, avx512_subkeyHtbl);
+
+ //Move initial counter value and STATE value into variables
__ movdqu(CTR_BLOCKx, Address(counter, 0));
__ movdqu(AAD_HASHx, Address(state, 0));
- // Load lswap mask for ghash
+
+ //Load lswap mask for ghash
__ movdqu(xmm24, ExternalAddress(ghash_long_swap_mask_addr()), rbx /*rscratch*/);
- // Shuffle input state using lswap mask
+ //Shuffle input state using lswap mask
__ vpshufb(AAD_HASHx, AAD_HASHx, xmm24, Assembler::AVX_128bit);
// Compute #rounds for AES based on the length of the key array
__ movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
- // Broadcast counter value to 512 bit register
+ __ evmovdquq(ADDBE_4x4, ExternalAddress(counter_mask_addbe_4444_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+ __ evmovdquq(ADDBE_1234, ExternalAddress(counter_mask_addbe_1234_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+ __ evmovdquq(SHUF_MASK, ExternalAddress(counter_shuffle_mask_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+ __ evmovdquq(ADD_1234, ExternalAddress(counter_mask_add_1234_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+
+ //Shuffle counter, subtract 1 from the pre-incremented counter value and broadcast counter value to 512 bit register
+ __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, SHUF_MASK, Assembler::AVX_128bit);
+ __ vpsubd(CTR_BLOCKx, CTR_BLOCKx, ADD_1234, Assembler::AVX_128bit);
__ evshufi64x2(CTR_BLOCKx, CTR_BLOCKx, CTR_BLOCKx, 0, Assembler::AVX_512bit);
- // Load counter shuffle mask
- __ evmovdquq(xmm24, ExternalAddress(counter_shuffle_mask_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
- // Shuffle counter
- __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, xmm24, Assembler::AVX_512bit);
-
- // Load mask for incrementing counter
- __ evmovdquq(COUNTER_INC_MASK, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
- // Pre-increment counter
- __ vpaddd(ZTMP5, CTR_BLOCKx, ExternalAddress(counter_mask_linc0_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
- __ vpaddd(ZTMP6, ZTMP5, COUNTER_INC_MASK, Assembler::AVX_512bit);
- __ vpaddd(ZTMP7, ZTMP6, COUNTER_INC_MASK, Assembler::AVX_512bit);
- __ vpaddd(ZTMP8, ZTMP7, COUNTER_INC_MASK, Assembler::AVX_512bit);
-
- // Begin 32 blocks of AES processing
- __ bind(AES_32_BLOCKS);
- // Save incremented counter before overwriting it with AES data
- __ evmovdquq(CTR_BLOCKx, ZTMP8, Assembler::AVX_512bit);
-
- // Move 256 bytes of data
- loadData(in, pos, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- // Load key shuffle mask
- __ movdqu(xmm29, ExternalAddress(key_shuffle_mask_addr()), rbx /*rscratch*/);
- // Load 0th AES round key
- ev_load_key(ZTMP4, key, 0, xmm29);
- // AES-ROUND0, xmm24 has the shuffle mask
- shuffleExorRnd1Key(ZTMP5, ZTMP6, ZTMP7, ZTMP8, xmm24, ZTMP4);
-
- for (int j = 1; j < 10; j++) {
- ev_load_key(ZTMP4, key, j * 16, xmm29);
- roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- }
- ev_load_key(ZTMP4, key, 10 * 16, xmm29);
- // AES rounds up to 11 (AES192) or 13 (AES256)
- __ cmpl(rounds, 52);
- __ jcc(Assembler::greaterEqual, AES_192);
- lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- __ jmp(STORE_CT);
-
- __ bind(AES_192);
- roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- ev_load_key(ZTMP4, key, 11 * 16, xmm29);
- roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- __ cmpl(rounds, 60);
- __ jcc(Assembler::aboveEqual, AES_256);
- ev_load_key(ZTMP4, key, 12 * 16, xmm29);
- lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- __ jmp(STORE_CT);
-
- __ bind(AES_256);
- ev_load_key(ZTMP4, key, 12 * 16, xmm29);
- roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- ev_load_key(ZTMP4, key, 13 * 16, xmm29);
- roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- ev_load_key(ZTMP4, key, 14 * 16, xmm29);
- // Last AES round
- lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
-
- __ bind(STORE_CT);
- // Xor the encrypted key with PT to obtain CT
- xorBeforeStore(ZTMP5, ZTMP6, ZTMP7, ZTMP8, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- storeData(out, pos, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- // 16 blocks encryption completed
- __ addl(pos, 256);
- __ cmpl(pos, 512);
- __ jcc(Assembler::aboveEqual, GHASH_AES_PARALLEL);
- __ vpaddd(ZTMP5, CTR_BLOCKx, COUNTER_INC_MASK, Assembler::AVX_512bit);
- __ vpaddd(ZTMP6, ZTMP5, COUNTER_INC_MASK, Assembler::AVX_512bit);
- __ vpaddd(ZTMP7, ZTMP6, COUNTER_INC_MASK, Assembler::AVX_512bit);
- __ vpaddd(ZTMP8, ZTMP7, COUNTER_INC_MASK, Assembler::AVX_512bit);
- __ jmp(AES_32_BLOCKS);
-
- __ bind(GHASH_AES_PARALLEL);
- // Ghash16_encrypt16_parallel takes place in the order with three reduction values:
- // 1) First time -> cipher xor input ghash
- // 2) No reduction -> accumulate multiplication values
- // 3) Final reduction post 48 blocks -> new ghash value is computed for the next round
- // Reduction value = first time
- ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
- __ addl(pos, 256);
- __ addl(ghash_pos, 256);
- index += 4;
-
- // At this point we have processed 768 bytes of AES and 256 bytes of GHASH.
- // If the remaining length is less than 768, process remaining 512 bytes of ghash in GHASH_LAST_32 code
- __ subl(len, 768);
- __ cmpl(len, 768);
- __ jcc(Assembler::less, GHASH_LAST_32);
-
- // AES 16 blocks and GHASH 16 blocks in parallel
- // For multiples of 48 blocks we will do ghash16_encrypt16 interleaved multiple times
- // Reduction value = no reduction means that the carryless multiplication values are accumulated for further calculations
- // Each call uses 4 subkeyHtbl values, so increment the index by 4.
- __ bind(GHASH_16_AES_16);
- // Reduction value = no reduction
- ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
- __ addl(pos, 256);
- __ addl(ghash_pos, 256);
- index += 4;
- // Reduction value = final reduction means that the accumulated values have to be reduced as we have completed 48 blocks of ghash
- ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, true, index, COUNTER_INC_MASK);
- __ addl(pos, 256);
- __ addl(ghash_pos, 256);
- // Calculated ghash value needs to be __ moved to AAD_HASHX so that we can restart the ghash16-aes16 pipeline
- __ movdqu(AAD_HASHx, ZTMP5);
- index = 0; // Reset subkeyHtbl index
-
- // Restart the pipeline
- // Reduction value = first time
- ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
- __ addl(pos, 256);
- __ addl(ghash_pos, 256);
- index += 4;
-
- __ subl(len, 768);
- __ cmpl(len, 768);
- __ jcc(Assembler::greaterEqual, GHASH_16_AES_16);
-
- // GHASH last 32 blocks processed here
- // GHASH products accumulated in ZMM27, ZMM25 and ZMM26 during GHASH16-AES16 operation is used
- __ bind(GHASH_LAST_32);
- // Use rbx as a pointer to the htbl; For last 32 blocks of GHASH, use key# 4-11 entry in subkeyHtbl
- __ movl(rbx, 256);
- // Load cipher blocks
- __ evmovdquq(ZTMP13, Address(ct, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
- __ evmovdquq(ZTMP14, Address(ct, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
- __ vpshufb(ZTMP13, ZTMP13, xmm24, Assembler::AVX_512bit);
- __ vpshufb(ZTMP14, ZTMP14, xmm24, Assembler::AVX_512bit);
- // Load ghash keys
- __ evmovdquq(ZTMP15, Address(avx512_subkeyHtbl, rbx, Address::times_1, 0 * 64), Assembler::AVX_512bit);
- __ evmovdquq(ZTMP16, Address(avx512_subkeyHtbl, rbx, Address::times_1, 1 * 64), Assembler::AVX_512bit);
-
- // Ghash blocks 0 - 3
- carrylessMultiply(ZTMP2, ZTMP3, ZTMP4, ZTMP1, ZTMP13, ZTMP15);
- // Ghash blocks 4 - 7
- carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP14, ZTMP16);
-
- __ vpternlogq(ZTMP1, 0x96, ZTMP5, xmm27, Assembler::AVX_512bit); // ZTMP1 = ZTMP1 + ZTMP5 + zmm27
- __ vpternlogq(ZTMP2, 0x96, ZTMP6, xmm26, Assembler::AVX_512bit); // ZTMP2 = ZTMP2 + ZTMP6 + zmm26
- __ vpternlogq(ZTMP3, 0x96, ZTMP7, xmm25, Assembler::AVX_512bit); // ZTMP3 = ZTMP3 + ZTMP7 + zmm25
- __ evpxorq(ZTMP4, ZTMP4, ZTMP8, Assembler::AVX_512bit); // ZTMP4 = ZTMP4 + ZTMP8
-
- __ addl(ghash_pos, 128);
- __ addl(rbx, 128);
-
- // Ghash remaining blocks
- __ bind(LOOP);
- __ cmpl(ghash_pos, pos);
- __ jcc(Assembler::aboveEqual, ACCUMULATE);
- // Load next cipher blocks and corresponding ghash keys
- __ evmovdquq(ZTMP13, Address(ct, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
- __ evmovdquq(ZTMP14, Address(ct, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
- __ vpshufb(ZTMP13, ZTMP13, xmm24, Assembler::AVX_512bit);
- __ vpshufb(ZTMP14, ZTMP14, xmm24, Assembler::AVX_512bit);
- __ evmovdquq(ZTMP15, Address(avx512_subkeyHtbl, rbx, Address::times_1, 0 * 64), Assembler::AVX_512bit);
- __ evmovdquq(ZTMP16, Address(avx512_subkeyHtbl, rbx, Address::times_1, 1 * 64), Assembler::AVX_512bit);
-
- // ghash blocks 0 - 3
- carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP13, ZTMP15);
-
- // ghash blocks 4 - 7
- carrylessMultiply(ZTMP10, ZTMP11, ZTMP12, ZTMP9, ZTMP14, ZTMP16);
-
- // update sums
- // ZTMP1 = ZTMP1 + ZTMP5 + ZTMP9
- // ZTMP2 = ZTMP2 + ZTMP6 + ZTMP10
- // ZTMP3 = ZTMP3 + ZTMP7 xor ZTMP11
- // ZTMP4 = ZTMP4 + ZTMP8 xor ZTMP12
- xorGHASH(ZTMP1, ZTMP2, ZTMP3, ZTMP4, ZTMP5, ZTMP9, ZTMP6, ZTMP10, ZTMP7, ZTMP11, ZTMP8, ZTMP12);
- __ addl(ghash_pos, 128);
- __ addl(rbx, 128);
- __ jmp(LOOP);
- // Integrate ZTMP3/ZTMP4 into ZTMP1 and ZTMP2
- __ bind(ACCUMULATE);
- __ evpxorq(ZTMP3, ZTMP3, ZTMP4, Assembler::AVX_512bit);
- __ vpsrldq(ZTMP7, ZTMP3, 8, Assembler::AVX_512bit);
- __ vpslldq(ZTMP8, ZTMP3, 8, Assembler::AVX_512bit);
- __ evpxorq(ZTMP1, ZTMP1, ZTMP7, Assembler::AVX_512bit);
- __ evpxorq(ZTMP2, ZTMP2, ZTMP8, Assembler::AVX_512bit);
-
- // Add ZTMP1 and ZTMP2 128 - bit words horizontally
- vhpxori4x128(ZTMP1, ZTMP11);
- vhpxori4x128(ZTMP2, ZTMP12);
- // Load reduction polynomial and compute final reduction
- __ evmovdquq(ZTMP15, ExternalAddress(ghash_polynomial_reduction_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
- vclmul_reduce(AAD_HASHx, ZTMP15, ZTMP1, ZTMP2, ZTMP3, ZTMP4);
-
- // Pre-increment counter for next operation
- __ vpaddd(CTR_BLOCKx, CTR_BLOCKx, xmm18, Assembler::AVX_128bit);
- // Shuffle counter and save the updated value
- __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, xmm24, Assembler::AVX_512bit);
+ __ movdl(CTR_CHECK, CTR_BLOCKx);
+ __ andl(CTR_CHECK, 255);
+
+ // Reshuffle counter
+ __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, SHUF_MASK, Assembler::AVX_512bit);
+
+ initial_blocks_16_avx512(in, out, ct, pos, key, avx512_subkeyHtbl, CTR_CHECK, rounds, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK, stack_offset);
+ __ addl(pos, 16 * 16);
+ __ cmpl(len, 32 * 16);
+ __ jcc(Assembler::below, MESG_BELOW_32_BLKS);
+
+ initial_blocks_16_avx512(in, out, ct, pos, key, avx512_subkeyHtbl, CTR_CHECK, rounds, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK, stack_offset + 16);
+ __ addl(pos, 16 * 16);
+ __ subl(len, 32 * 16);
+
+ __ cmpl(len, 32 * 16);
+ __ jcc(Assembler::below, NO_BIG_BLKS);
+
+ __ bind(ENCRYPT_BIG_BLKS_NO_HXOR);
+ __ cmpl(len, 2 * 32 * 16);
+ __ jcc(Assembler::below, ENCRYPT_BIG_NBLKS);
+ ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK,
+ true, true, false, false, false, ghashin_offset, aesout_offset, HashKey_32);
+ __ addl(pos, 16 * 16);
+
+ ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK,
+ true, false, true, false, true, ghashin_offset + 16, aesout_offset + 16, HashKey_16);
+ __ evmovdquq(AAD_HASHx, ZTMP4, Assembler::AVX_512bit);
+ __ addl(pos, 16 * 16);
+ __ subl(len, 32 * 16);
+ __ jmp(ENCRYPT_BIG_BLKS_NO_HXOR);
+
+ __ bind(ENCRYPT_BIG_NBLKS);
+ ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK,
+ false, true, false, false, false, ghashin_offset, aesout_offset, HashKey_32);
+ __ addl(pos, 16 * 16);
+ ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK,
+ false, false, true, true, true, ghashin_offset + 16, aesout_offset + 16, HashKey_16);
+
+ __ movdqu(AAD_HASHx, ZTMP4);
+ __ addl(pos, 16 * 16);
+ __ subl(len, 32 * 16);
+
+ __ bind(NO_BIG_BLKS);
+ __ cmpl(len, 16 * 16);
+ __ jcc(Assembler::aboveEqual, ENCRYPT_16_BLKS);
+
+ __ bind(ENCRYPT_N_GHASH_32_N_BLKS);
+ ghash16_avx512(true, false, false, false, true, in, pos, avx512_subkeyHtbl, AAD_HASHx, SHUF_MASK, stack_offset, 0, 0, HashKey_32);
+ gcm_enc_dec_last_avx512(len, in, pos, AAD_HASHx, SHUF_MASK, avx512_subkeyHtbl, ghashin_offset + 16, HashKey_16, false, true);
+ __ jmp(GHASH_DONE);
+
+ __ bind(ENCRYPT_16_BLKS);
+ ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK,
+ false, true, false, false, false, ghashin_offset, aesout_offset, HashKey_32);
+
+ ghash16_avx512(false, true, false, false, true, in, pos, avx512_subkeyHtbl, AAD_HASHx, SHUF_MASK, stack_offset, 16 * 16, 0, HashKey_16);
+
+ __ bind(MESG_BELOW_32_BLKS);
+ __ subl(len, 16 * 16);
+ __ addl(pos, 16 * 16);
+ gcm_enc_dec_last_avx512(len, in, pos, AAD_HASHx, SHUF_MASK, avx512_subkeyHtbl, ghashin_offset, HashKey_16, true, true);
+
+ __ bind(GHASH_DONE);
+ //Pre-increment counter for next operation, make sure that counter value is incremented on the LSB
+ __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, SHUF_MASK, Assembler::AVX_128bit);
+ __ vpaddd(CTR_BLOCKx, CTR_BLOCKx, ADD_1234, Assembler::AVX_128bit);
+ __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, SHUF_MASK, Assembler::AVX_128bit);
__ movdqu(Address(counter, 0), CTR_BLOCKx);
- // Load ghash lswap mask
+ //Load ghash lswap mask
__ movdqu(xmm24, ExternalAddress(ghash_long_swap_mask_addr()), rbx /*rscratch*/);
- // Shuffle ghash using lbswap_mask and store it
+ //Shuffle ghash using lbswap_mask and store it
__ vpshufb(AAD_HASHx, AAD_HASHx, xmm24, Assembler::AVX_128bit);
__ movdqu(Address(state, 0), AAD_HASHx);
- __ jmp(ENC_DEC_DONE);
- __ bind(GENERATE_HTBL_48_BLKS);
- generateHtbl_48_block_zmm(subkeyHtbl, avx512_subkeyHtbl, rbx /*rscratch*/);
+ //Zero out sensitive data
+ __ evpxorq(ZTMP21, ZTMP21, ZTMP21, Assembler::AVX_512bit);
+ __ evpxorq(ZTMP0, ZTMP0, ZTMP0, Assembler::AVX_512bit);
+ __ evpxorq(ZTMP1, ZTMP1, ZTMP1, Assembler::AVX_512bit);
+ __ evpxorq(ZTMP2, ZTMP2, ZTMP2, Assembler::AVX_512bit);
+ __ evpxorq(ZTMP3, ZTMP3, ZTMP3, Assembler::AVX_512bit);
__ bind(ENC_DEC_DONE);
- __ movq(rax, pos);
}
//Implements data * hashkey mod (128, 127, 126, 121, 0)
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
index 5b316881d0346..c72c32e796d2d 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
@@ -2627,7 +2627,6 @@ address StubGenerator::generate_unsafe_setmemory(const char *name,
// Fill words
{
- Label L_wordsTail, L_wordsLoop, L_wordsTailLoop;
UnsafeMemoryAccessMark umam(this, true, true);
// At this point, we know the lower bit of size is zero and a
@@ -2641,7 +2640,6 @@ address StubGenerator::generate_unsafe_setmemory(const char *name,
// Fill QUADWORDs
{
- Label L_qwordLoop, L_qwordsTail, L_qwordsTailLoop;
UnsafeMemoryAccessMark umam(this, true, true);
// At this point, we know the lower 3 bits of size are zero and a
@@ -2658,7 +2656,6 @@ address StubGenerator::generate_unsafe_setmemory(const char *name,
// Fill DWORDs
{
- Label L_dwordLoop, L_dwordsTail, L_dwordsTailLoop;
UnsafeMemoryAccessMark umam(this, true, true);
// At this point, we know the lower 2 bits of size are zero and a
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp
index 2056fa057654e..5a9b084841376 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2019, 2021, Intel Corporation. All rights reserved.
+* Copyright (c) 2019, 2024, Intel Corporation. All rights reserved.
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -57,7 +57,10 @@ address StubGenerator::ghash_byte_swap_mask_addr() {
// Polynomial x^128+x^127+x^126+x^121+1
ATTRIBUTE_ALIGNED(16) static const uint64_t GHASH_POLYNOMIAL[] = {
- 0x0000000000000001UL, 0xC200000000000000UL,
+ 0x0000000000000001ULL, 0xC200000000000000ULL,
+ 0x0000000000000001ULL, 0xC200000000000000ULL,
+ 0x0000000000000001ULL, 0xC200000000000000ULL,
+ 0x0000000000000001ULL, 0xC200000000000000ULL
};
address StubGenerator::ghash_polynomial_addr() {
return (address)GHASH_POLYNOMIAL;
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp
new file mode 100644
index 0000000000000..92ac78e15cba9
--- /dev/null
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp
@@ -0,0 +1,502 @@
+/*
+* Copyright (c) 2024, Intel Corporation. All rights reserved.
+* Intel Math Library (LIBM) Source Code
+*
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+*
+* This code is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 only, as
+* published by the Free Software Foundation.
+*
+* This code is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+* version 2 for more details (a copy is included in the LICENSE file that
+* accompanied this code).
+*
+* You should have received a copy of the GNU General Public License version
+* 2 along with this work; if not, write to the Free Software Foundation,
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+* or visit www.oracle.com if you need additional information or have any
+* questions.
+*
+*/
+
+#include "precompiled.hpp"
+#include "macroAssembler_x86.hpp"
+#include "stubGenerator_x86_64.hpp"
+
+/******************************************************************************/
+// ALGORITHM DESCRIPTION
+// ---------------------
+//
+// tanh(x)=(exp(x)-exp(-x))/(exp(x)+exp(-x))=(1-exp(-2*x))/(1+exp(-2*x))
+//
+// Let |x|=xH+xL (upper 26 bits, lower 27 bits)
+// log2(e) rounded to 26 bits (high part) plus a double precision low part is
+// L2EH+L2EL (upper 26, lower 53 bits)
+//
+// Let xH*L2EH=k+f+r`, where (k+f)*2^8*2=int(xH*L2EH*2^9),
+// f=0.b1 b2 ... b8, k integer
+// 2^{-f} is approximated as Tn[f]+Dn[f]
+// Tn stores the high 53 bits, Dn stores (2^{-f}-Tn[f]) rounded to double precision
+//
+// r=r`+xL*L2EH+|x|*L2EL, |r|<2^{-9}+2^{-14},
+// for |x| in [23/64,3*2^7)
+// e^{-2*|x|}=2^{-k-f}*2^{-r} ~ 2^{-k}*(Tn+Dn)*(1+p)=(T0+D0)*(1+p)
+//
+// For |x| in [2^{-4},2^5):
+// 2^{-r}-1 ~ p=c1*r+c2*r^2+..+c5*r^5
+// Let R=1/(1+T0+p*T0), truncated to 35 significant bits
+// R=1/(1+T0+D0+p*(T0+D0))*(1+eps), |eps|<2^{-33}
+// 1+T0+D0+p*(T0+D0)=KH+KL, where
+// KH=(1+T0+c1*r*T0)_high (leading 17 bits)
+// KL=T0_low+D0+(c1*r*T0)_low+c1*r*D0+(c2*r^2+..c5*r^5)*T0
+// eps ~ (R*KH-1)+R*KL
+// 1/(1+T0+D0+p*(T0+D0)) ~ R-R*eps
+// The result is approximated as (1-T0-D0-(T0+D0)*p)*(R-R*eps)
+// 1-T0-D0-(T0+D0)*p=-((KH-2)+KL)
+// The result is formed as
+// (KH-2)*R+(-(KH-2)*R*eps+(KL*R-KL*R*eps)), with the correct sign
+// set at the end
+//
+// For |x| in [2^{-64},2^{-4}):
+// A Taylor series expansion is used (x+p3*x^3+..+p13*x^{13})
+//
+// For |x|<2^{-64}: x is returned
+//
+// For |x|>=2^32: return +/-1
+//
+// Special cases:
+// tanh(NaN) = quiet NaN, and raise invalid exception
+// tanh(INF) = that INF
+// tanh(+/-0) = +/-0
+//
+/******************************************************************************/
+
+ATTRIBUTE_ALIGNED(4) static const juint _HALFMASK[] =
+{
+ 4160749568, 2147483647
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _ONEMASK[] =
+{
+ 0, 1072693248
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _TWOMASK[] =
+{
+ 0, 1073741824
+};
+
+ATTRIBUTE_ALIGNED(16) static const juint _MASK3[] =
+{
+ 0, 4294967280, 0, 4294967280
+};
+
+ATTRIBUTE_ALIGNED(16) static const juint _RMASK[] =
+{
+ 4294705152, 4294967295, 4294705152, 4294967295
+};
+
+ATTRIBUTE_ALIGNED(16) static const juint _L2E[] =
+{
+ 1610612736, 1082594631, 4166901572, 1055174155
+};
+
+ATTRIBUTE_ALIGNED(16) static const juint _Shifter[] =
+{
+ 0, 1127743488, 0, 3275227136
+};
+
+ATTRIBUTE_ALIGNED(16) static const juint _cv[] =
+{
+ 3884607281, 3168131199, 3607404735, 3190582024, 1874480759,
+ 1032041131, 4286760334, 1053736893, 4277811695, 3211144770,
+ 0, 0
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _pv[] =
+{
+ 236289503, 1064135997, 463583772, 3215696314, 1441186365,
+ 3212977891, 286331153, 1069617425, 2284589306, 1066820852,
+ 1431655765, 3218429269
+};
+
+ATTRIBUTE_ALIGNED(16) static const juint _T2_neg_f[] =
+{
+ 0, 1072693248, 0, 0, 1797923801, 1072687577,
+ 1950547427, 1013229059, 730821105, 1072681922, 2523232743, 1012067188,
+ 915592468, 1072676282, 352947894, 3161024371, 2174652632, 1072670657,
+ 4087714590, 1014450259, 35929225, 1072665048, 2809788041, 3159436968,
+ 2912730644, 1072659453, 3490067722, 3163405074, 2038973688, 1072653874,
+ 892941374, 1016046459, 1533953344, 1072648310, 769171851, 1015665633,
+ 1222472308, 1072642761, 1054357470, 3161021018, 929806999, 1072637227,
+ 3205336643, 1015259557, 481706282, 1072631708, 1696079173, 3162710528,
+ 3999357479, 1072626203, 2258941616, 1015924724, 2719515920, 1072620714,
+ 2760332941, 1015137933, 764307441, 1072615240, 3021057420, 3163329523,
+ 2256325230, 1072609780, 580117746, 1015317295, 2728693978, 1072604335,
+ 396109971, 3163462691, 2009970496, 1072598905, 2159039665, 3162572948,
+ 4224142467, 1072593489, 3389820386, 1015207202, 610758006, 1072588089,
+ 1965209397, 3161866232, 3884662774, 1072582702, 2158611599, 1014210185,
+ 991358482, 1072577331, 838715019, 3163157668, 351641897, 1072571974,
+ 2172261526, 3163010599, 1796832535, 1072566631, 3176955716, 3160585513,
+ 863738719, 1072561303, 1326992220, 3162613197, 1679558232, 1072555989,
+ 2390342287, 3163333970, 4076975200, 1072550689, 2029000899, 1015208535,
+ 3594158869, 1072545404, 2456521700, 3163256561, 64696965, 1072540134,
+ 1768797490, 1015816960, 1912561781, 1072534877, 3147495102, 1015678253,
+ 382305176, 1072529635, 2347622376, 3162578625, 3898795731, 1072524406,
+ 1249994144, 1011869818, 3707479175, 1072519192, 3613079303, 1014164738,
+ 3939148246, 1072513992, 3210352148, 1015274323, 135105010, 1072508807,
+ 1906148728, 3163375739, 721996136, 1072503635, 563754734, 1015371318,
+ 1242007932, 1072498477, 1132034716, 3163339831, 1532734324, 1072493333,
+ 3094216535, 3163162857, 1432208378, 1072488203, 1401068914, 3162363963,
+ 778901109, 1072483087, 2248183955, 3161268751, 3706687593, 1072477984,
+ 3521726940, 1013253067, 1464976603, 1072472896, 3507292405, 3161977534,
+ 2483480501, 1072467821, 1216371780, 1013034172, 2307442995, 1072462760,
+ 3190117721, 3162404539, 777507147, 1072457713, 4282924205, 1015187533,
+ 2029714210, 1072452679, 613660079, 1015099143, 1610600570, 1072447659,
+ 3766732298, 1015760183, 3657065772, 1072442652, 399025623, 3162957078,
+ 3716502172, 1072437659, 2303740125, 1014042725, 1631695677, 1072432680,
+ 2717633076, 3162344026, 1540824585, 1072427714, 1064017011, 3163487690,
+ 3287523847, 1072422761, 1625971539, 3157009955, 2420883922, 1072417822,
+ 2049810052, 1014119888, 3080351519, 1072412896, 3379126788, 3157218001,
+ 815859274, 1072407984, 240396590, 3163487443, 4062661092, 1072403084,
+ 1422616006, 3163255318, 4076559943, 1072398198, 2119478331, 3160758351,
+ 703710506, 1072393326, 1384660846, 1015195891, 2380618042, 1072388466,
+ 3149557219, 3163320799, 364333489, 1072383620, 3923737744, 3161421373,
+ 3092190715, 1072378786, 814012168, 3159523422, 1822067026, 1072373966,
+ 1241994956, 1015340290, 697153126, 1072369159, 1283515429, 3163283189,
+ 3861050111, 1072364364, 254893773, 3162813180, 2572866477, 1072359583,
+ 878562433, 1015521741, 977020788, 1072354815, 3065100517, 1015541563,
+ 3218338682, 1072350059, 3404164304, 3162477108, 557149882, 1072345317,
+ 3672720709, 1014537265, 1434058175, 1072340587, 251133233, 1015085769,
+ 1405169241, 1072335870, 2998539689, 3162830951, 321958744, 1072331166,
+ 3401933767, 1015794558, 2331271250, 1072326474, 812057446, 1012207446,
+ 2990417245, 1072321795, 3683467745, 3163369326, 2152073944, 1072317129,
+ 1486860576, 3163203456, 3964284211, 1072312475, 2111583915, 1015427164,
+ 3985553595, 1072307834, 4002146062, 1015834136, 2069751141, 1072303206,
+ 1562170675, 3162724681, 2366108318, 1072298590, 2867985102, 3161762254,
+ 434316067, 1072293987, 2028358766, 1013458122, 424392917, 1072289396,
+ 2749202995, 3162838718, 2191782032, 1072284817, 2960257726, 1013742662,
+ 1297350157, 1072280251, 1308022040, 3163412558, 1892288442, 1072275697,
+ 2446255666, 3162600381, 3833209506, 1072271155, 2722920684, 1013754842,
+ 2682146384, 1072266626, 2082178513, 3163363419, 2591453363, 1072262109,
+ 2132396182, 3159074198, 3418903055, 1072257604, 2527457337, 3160820604,
+ 727685349, 1072253112, 2038246809, 3162358742, 2966275557, 1072248631,
+ 2176155324, 3159842759, 1403662306, 1072244163, 2788809599, 3161671007,
+ 194117574, 1072239707, 777528612, 3163412089, 3492293770, 1072235262,
+ 2248032210, 1015386826, 2568320822, 1072230830, 2732824428, 1014352915,
+ 1577608921, 1072226410, 1875489510, 3162968394, 380978316, 1072222002,
+ 854188970, 3160462686, 3134592888, 1072217605, 4232266862, 1015991134,
+ 1110089947, 1072213221, 1451641639, 1015474673, 2759350287, 1072208848,
+ 1148526634, 1015894933, 3649726105, 1072204487, 4085036346, 1015649474,
+ 3643909174, 1072200138, 3537586109, 1014354647, 2604962541, 1072195801,
+ 2614425274, 3163539192, 396319521, 1072191476, 4172420816, 3159074632,
+ 1176749997, 1072187162, 2738998779, 3162035844, 515457527, 1072182860,
+ 836709333, 1015651226, 2571947539, 1072178569, 3558159064, 3163376669,
+ 2916157145, 1072174290, 219487565, 1015309367, 1413356050, 1072170023,
+ 1651349291, 3162668166, 2224145553, 1072165767, 3482522030, 3161489169,
+ 919555682, 1072161523, 3121969534, 1012948226, 1660913392, 1072157290,
+ 4218599604, 1015135707, 19972402, 1072153069, 3507899862, 1016009292,
+ 158781403, 1072148859, 2221464712, 3163286453, 1944781191, 1072144660,
+ 3993278767, 3161724279, 950803702, 1072140473, 1655364926, 1015237032,
+ 1339972927, 1072136297, 167908909, 1015572152, 2980802057, 1072132132,
+ 378619896, 1015773303, 1447192521, 1072127979, 1462857171, 3162514521,
+ 903334909, 1072123837, 1636462108, 1015039997, 1218806132, 1072119706,
+ 1818613052, 3162548441, 2263535754, 1072115586, 752233586, 3162639008,
+ 3907805044, 1072111477, 2257091225, 3161550407, 1727278727, 1072107380,
+ 3562710623, 1011471940, 4182873220, 1072103293, 629542646, 3161996303,
+ 2555984613, 1072099218, 2652555442, 3162552692, 1013258799, 1072095154,
+ 1748797611, 3160129082, 3721688645, 1072091100, 3069276937, 1015839401,
+ 1963711167, 1072087058, 1744767757, 3160574294, 4201977662, 1072083026,
+ 748330254, 1013594357, 1719614413, 1072079006, 330458198, 3163282740,
+ 2979960120, 1072074996, 2599109725, 1014498493, 3561793907, 1072070997,
+ 1157054053, 1011890350, 3339203574, 1072067009, 1483497780, 3162408754,
+ 2186617381, 1072063032, 2270764084, 3163272713, 4273770423, 1072059065,
+ 3383180809, 3163218901, 885834528, 1072055110, 1973258547, 3162261564,
+ 488188413, 1072051165, 3199821029, 1015564048, 2956612997, 1072047230,
+ 2118169751, 3162735553, 3872257780, 1072043306, 1253592103, 1015958334,
+ 3111574537, 1072039393, 2606161479, 3162759746, 551349105, 1072035491,
+ 3821916050, 3162106589, 363667784, 1072031599, 813753950, 1015785209,
+ 2425981843, 1072027717, 2830390851, 3163346599, 2321106615, 1072023846,
+ 2171176610, 1009535771, 4222122499, 1072019985, 1277378074, 3163256737,
+ 3712504873, 1072016135, 88491949, 1015427660, 671025100, 1072012296,
+ 3832014351, 3163022030, 3566716925, 1072008466, 1536826856, 1014142433,
+ 3689071823, 1072004647, 2321004996, 3162552716, 917841882, 1072000839,
+ 18715565, 1015659308, 3723038930, 1071997040, 378465264, 3162569582,
+ 3395129871, 1071993252, 4025345435, 3162335388, 4109806887, 1071989474,
+ 422403966, 1014469229, 1453150082, 1071985707, 498154669, 3161488062,
+ 3896463087, 1071981949, 1139797873, 3161233805, 2731501122, 1071978202,
+ 1774031855, 3162470021, 2135241198, 1071974465, 1236747871, 1013589147,
+ 1990012071, 1071970738, 3529070563, 3162813193, 2178460671, 1071967021,
+ 777878098, 3162842493, 2583551245, 1071963314, 3161094195, 1015606491,
+ 3088564500, 1071959617, 1762311517, 1015045673, 3577096743, 1071955930,
+ 2951496418, 1013793687, 3933059031, 1071952253, 2133366768, 3161531832,
+ 4040676318, 1071948586, 4090609238, 1015663458, 3784486610, 1071944929,
+ 1581883040, 3161698953, 3049340112, 1071941282, 3062915824, 1013170595,
+ 1720398391, 1071937645, 3980678963, 3163300080, 3978100823, 1071934017,
+ 3513027190, 1015845963, 1118294578, 1071930400, 2197495694, 3159909401,
+ 1617004845, 1071926792, 82804944, 1010342778, 1065662932, 1071923194,
+ 2533670915, 1014530238, 3645941911, 1071919605, 3814685081, 3161573341,
+ 654919306, 1071916027, 3232961757, 3163047469, 569847338, 1071912458,
+ 472945272, 3159290729, 3278348324, 1071908898, 3069497416, 1014750712,
+ 78413852, 1071905349, 4183226867, 3163017251, 3743175029, 1071901808,
+ 2072812490, 3162175075, 1276261410, 1071898278, 300981948, 1014684169,
+ 1156440435, 1071894757, 2351451249, 1013967056, 3272845541, 1071891245,
+ 928852419, 3163488248, 3219942644, 1071887743, 3798990616, 1015368806,
+ 887463927, 1071884251, 3596744163, 3160794166, 460407023, 1071880768,
+ 4237175092, 3163138469, 1829099622, 1071877294, 1016661181, 3163461005,
+ 589198666, 1071873830, 2664346172, 3163157962, 926591435, 1071870375,
+ 3208833762, 3162913514, 2732492859, 1071866929, 2691479646, 3162255684,
+ 1603444721, 1071863493, 1548633640, 3162201326, 1726216749, 1071860066,
+ 2466808228, 3161676405, 2992903935, 1071856648, 2218154406, 1015228193,
+ 1000925746, 1071853240, 1018491672, 3163309544, 4232894513, 1071849840,
+ 2383938684, 1014668519, 3991843581, 1071846450, 4092853457, 1014585763,
+ 171030293, 1071843070, 3526460132, 1014428778, 1253935211, 1071839698,
+ 1395382931, 3159702613, 2839424854, 1071836335, 1171596163, 1013041679,
+ 526652809, 1071832982, 4223459736, 1015879375, 2799960843, 1071829637,
+ 1423655381, 1015022151, 964107055, 1071826302, 2800439588, 3162833221,
+ 3504003472, 1071822975, 3594001060, 3157330652, 1724976915, 1071819658,
+ 420909223, 3163117379, 4112506593, 1071816349, 2947355221, 1014371048,
+ 1972484976, 1071813050, 675290301, 3161640050, 3790955393, 1071809759,
+ 2352942462, 3163180090, 874372905, 1071806478, 100263788, 1015940732,
+ 1709341917, 1071803205, 2571168217, 1014152499, 1897844341, 1071799941,
+ 1254300460, 1015275938, 1337108031, 1071796686, 3203724452, 1014677845,
+ 4219606026, 1071793439, 2434574742, 1014681548, 1853186616, 1071790202,
+ 3066496371, 1015656574, 2725843665, 1071786973, 1433917087, 1014838523,
+ 2440944790, 1071783753, 2492769774, 1014147454, 897099801, 1071780542,
+ 754756297, 1015241005, 2288159958, 1071777339, 2169144469, 1014876021,
+ 2218315341, 1071774145, 2694295388, 3163288868, 586995997, 1071770960,
+ 41662348, 3162627992, 1588871207, 1071767783, 143439582, 3162963416,
+ 828946858, 1071764615, 10642492, 1015939438, 2502433899, 1071761455,
+ 2148595913, 1015023991, 2214878420, 1071758304, 892270087, 3163116422,
+ 4162030108, 1071755161, 2763428480, 1015529349, 3949972341, 1071752027,
+ 2068408548, 1014913868, 1480023343, 1071748902, 2247196168, 1015327453,
+ 948735466, 1071745785, 3516338028, 3162574883, 2257959872, 1071742676,
+ 3802946148, 1012964927, 1014845819, 1071739576, 3117910646, 3161559105,
+ 1416741826, 1071736484, 2196380210, 1011413563, 3366293073, 1071733400,
+ 3119426314, 1014120554, 2471440686, 1071730325, 968836267, 3162214888,
+ 2930322912, 1071727258, 2599499422, 3162714047, 351405227, 1071724200,
+ 3125337328, 3159822479, 3228316108, 1071721149, 3010241991, 3158422804,
+ 2875075254, 1071718107, 4144233330, 3163333716, 3490863953, 1071715073,
+ 960797498, 3162948880, 685187902, 1071712048, 378731989, 1014843115,
+ 2952712987, 1071709030, 3293494651, 3160120301, 1608493509, 1071706021,
+ 3159622171, 3162807737, 852742562, 1071703020, 667253586, 1009793559,
+ 590962156, 1071700027, 3829346666, 3163275597, 728909815, 1071697042,
+ 383930225, 1015029468, 1172597893, 1071694065, 114433263, 1015347593,
+ 1828292879, 1071691096, 1255956747, 1015588398, 2602514713, 1071688135,
+ 2268929336, 1014354284, 3402036099, 1071685182, 405889334, 1015105656,
+ 4133881824, 1071682237, 2148155345, 3162931299, 410360776, 1071679301,
+ 1269990655, 1011975870, 728934454, 1071676372, 1413842688, 1014178612,
+ 702412510, 1071673451, 3803266087, 3162280415, 238821257, 1071670538,
+ 1469694871, 3162884987, 3541402996, 1071667632, 2759177317, 1014854626,
+ 1928746161, 1071664735, 983617676, 1014285177, 3899555717, 1071661845,
+ 427280750, 3162546972, 772914124, 1071658964, 4004372762, 1012230161,
+ 1048019041, 1071656090, 1398474845, 3160510595, 339411585, 1071653224,
+ 264588982, 3161636657, 2851812149, 1071650365, 2595802551, 1015767337,
+ 4200250559, 1071647514, 2808127345, 3161781938
+};
+
+#define __ _masm->
+
+address StubGenerator::generate_libmTanh() {
+ StubCodeMark mark(this, "StubRoutines", "libmTanh");
+ address start = __ pc();
+
+ Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
+ Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1;
+ Label B1_2, B1_4;
+
+ address HALFMASK = (address)_HALFMASK;
+ address ONEMASK = (address)_ONEMASK;
+ address TWOMASK = (address)_TWOMASK;
+ address MASK3 = (address)_MASK3;
+ address RMASK = (address)_RMASK;
+ address L2E = (address)_L2E;
+ address Shifter = (address)_Shifter;
+ address cv = (address)_cv;
+ address pv = (address)_pv;
+ address T2_neg_f = (address) _T2_neg_f;
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+ __ bind(B1_2);
+ __ movsd(xmm3, ExternalAddress(HALFMASK), r11 /*rscratch*/);
+ __ xorpd(xmm4, xmm4);
+ __ movsd(xmm1, ExternalAddress(L2E), r11 /*rscratch*/);
+ __ movsd(xmm2, ExternalAddress(L2E + 8), r11 /*rscratch*/);
+ __ movl(rax, 32768);
+ __ pinsrw(xmm4, rax, 3);
+ __ movsd(xmm6, ExternalAddress(Shifter), r11 /*rscratch*/);
+ __ pextrw(rcx, xmm0, 3);
+ __ andpd(xmm3, xmm0);
+ __ andnpd(xmm4, xmm0);
+ __ pshufd(xmm5, xmm4, 68);
+ __ movl(rdx, 32768);
+ __ andl(rdx, rcx);
+ __ andl(rcx, 32767);
+ __ subl(rcx, 16304);
+ __ cmpl(rcx, 144);
+ __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_1);
+ __ subsd(xmm4, xmm3);
+ __ mulsd(xmm3, xmm1);
+ __ mulsd(xmm2, xmm5);
+ __ cvtsd2siq(rax, xmm3);
+ __ movq(xmm7, xmm3);
+ __ addsd(xmm3, xmm6);
+ __ mulsd(xmm1, xmm4);
+ __ movsd(xmm4, ExternalAddress(ONEMASK), r11 /*rscratch*/);
+ __ subsd(xmm3, xmm6);
+ __ xorpd(xmm0, xmm0);
+ __ addsd(xmm2, xmm1);
+ __ subsd(xmm7, xmm3);
+ __ movdqu(xmm6, ExternalAddress(cv), r11 /*rscratch*/);
+ __ addsd(xmm2, xmm7);
+ __ movl(rcx, 255);
+ __ andl(rcx, rax);
+ __ addl(rcx, rcx);
+ __ lea(r8, ExternalAddress(T2_neg_f));
+ __ movdqu(xmm5, Address(r8, rcx, Address::times(8)));
+ __ shrl(rax, 4);
+ __ andl(rax, 65520);
+ __ subl(rax, 16368);
+ __ negl(rax);
+ __ pinsrw(xmm0, rax, 3);
+ __ movdqu(xmm1, ExternalAddress(cv + 16), r11 /*rscratch*/);
+ __ pshufd(xmm0, xmm0, 68);
+ __ mulpd(xmm0, xmm5);
+ __ movsd(xmm7, ExternalAddress(cv + 32), r11 /*rscratch*/);
+ __ pshufd(xmm2, xmm2, 68);
+ __ movq(xmm5, xmm4);
+ __ addsd(xmm4, xmm0);
+ __ mulpd(xmm6, xmm2);
+ __ mulsd(xmm7, xmm2);
+ __ mulpd(xmm2, xmm2);
+ __ addpd(xmm1, xmm6);
+ __ mulsd(xmm2, xmm2);
+ __ movsd(xmm3, ExternalAddress(ONEMASK), r11 /*rscratch*/);
+ __ mulpd(xmm1, xmm2);
+ __ pshufd(xmm6, xmm1, 78);
+ __ addsd(xmm1, xmm6);
+ __ movq(xmm6, xmm1);
+ __ addsd(xmm1, xmm7);
+ __ mulsd(xmm1, xmm0);
+ __ addsd(xmm1, xmm4);
+ __ andpd(xmm4, ExternalAddress(MASK3), r11 /*rscratch*/);
+ __ divsd(xmm5, xmm1);
+ __ subsd(xmm3, xmm4);
+ __ pshufd(xmm1, xmm0, 238);
+ __ addsd(xmm3, xmm0);
+ __ movq(xmm2, xmm4);
+ __ addsd(xmm3, xmm1);
+ __ mulsd(xmm1, xmm7);
+ __ mulsd(xmm7, xmm0);
+ __ addsd(xmm3, xmm1);
+ __ addsd(xmm4, xmm7);
+ __ movsd(xmm1, ExternalAddress(RMASK), r11 /*rscratch*/);
+ __ mulsd(xmm6, xmm0);
+ __ andpd(xmm4, ExternalAddress(MASK3), r11 /*rscratch*/);
+ __ addsd(xmm3, xmm6);
+ __ movq(xmm6, xmm4);
+ __ subsd(xmm2, xmm4);
+ __ addsd(xmm2, xmm7);
+ __ movsd(xmm7, ExternalAddress(ONEMASK), r11 /*rscratch*/);
+ __ andpd(xmm5, xmm1);
+ __ addsd(xmm3, xmm2);
+ __ mulsd(xmm4, xmm5);
+ __ xorpd(xmm2, xmm2);
+ __ mulsd(xmm3, xmm5);
+ __ subsd(xmm6, ExternalAddress(TWOMASK), r11 /*rscratch*/);
+ __ subsd(xmm4, xmm7);
+ __ xorl(rdx, 32768);
+ __ pinsrw(xmm2, rdx, 3);
+ __ addsd(xmm4, xmm3);
+ __ mulsd(xmm6, xmm5);
+ __ movq(xmm1, xmm3);
+ __ mulsd(xmm3, xmm4);
+ __ movq(xmm0, xmm6);
+ __ mulsd(xmm6, xmm4);
+ __ subsd(xmm1, xmm3);
+ __ subsd(xmm1, xmm6);
+ __ addsd(xmm0, xmm1);
+ __ xorpd(xmm0, xmm2);
+ __ jmp(B1_4);
+
+ __ bind(L_2TAG_PACKET_0_0_1);
+ __ addl(rcx, 960);
+ __ cmpl(rcx, 1104);
+ __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_1_0_1);
+ __ movdqu(xmm2, ExternalAddress(pv), r11 /*rscratch*/);
+ __ pshufd(xmm1, xmm0, 68);
+ __ movdqu(xmm3, ExternalAddress(pv + 16), r11 /*rscratch*/);
+ __ mulpd(xmm1, xmm1);
+ __ movdqu(xmm4, ExternalAddress(pv + 32), r11 /*rscratch*/);
+ __ mulpd(xmm2, xmm1);
+ __ pshufd(xmm5, xmm1, 68);
+ __ addpd(xmm2, xmm3);
+ __ mulsd(xmm5, xmm5);
+ __ mulpd(xmm2, xmm1);
+ __ mulsd(xmm5, xmm5);
+ __ addpd(xmm2, xmm4);
+ __ mulpd(xmm2, xmm5);
+ __ pshufd(xmm5, xmm2, 238);
+ __ addsd(xmm2, xmm5);
+ __ mulsd(xmm2, xmm0);
+ __ addsd(xmm0, xmm2);
+ __ jmp(B1_4);
+
+ __ bind(L_2TAG_PACKET_1_0_1);
+ __ addl(rcx, 15344);
+ __ cmpl(rcx, 16448);
+ __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_1);
+ __ cmpl(rcx, 16);
+ __ jcc(Assembler::below, L_2TAG_PACKET_3_0_1);
+ __ xorpd(xmm2, xmm2);
+ __ movl(rax, 17392);
+ __ pinsrw(xmm2, rax, 3);
+ __ mulsd(xmm2, xmm0);
+ __ addsd(xmm2, xmm0);
+ __ jmp(B1_4);
+
+ __ bind(L_2TAG_PACKET_3_0_1);
+ __ movq(xmm2, xmm0);
+ __ mulsd(xmm2, xmm2);
+ __ jmp(B1_4);
+
+ __ bind(L_2TAG_PACKET_2_0_1);
+ __ cmpl(rcx, 32752);
+ __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_4_0_1);
+ __ xorpd(xmm2, xmm2);
+ __ movl(rcx, 15344);
+ __ pinsrw(xmm2, rcx, 3);
+ __ movq(xmm3, xmm2);
+ __ mulsd(xmm2, xmm2);
+ __ addsd(xmm2, xmm3);
+
+ __ bind(L_2TAG_PACKET_5_0_1);
+ __ xorpd(xmm0, xmm0);
+ __ orl(rdx, 16368);
+ __ pinsrw(xmm0, rdx, 3);
+ __ jmp(B1_4);
+
+ __ bind(L_2TAG_PACKET_4_0_1);
+ __ movq(xmm2, xmm0);
+ __ movdl(rax, xmm0);
+ __ psrlq(xmm2, 20);
+ __ movdl(rcx, xmm2);
+ __ orl(rcx, rax);
+ __ cmpl(rcx, 0);
+ __ jcc(Assembler::equal, L_2TAG_PACKET_5_0_1);
+ __ addsd(xmm0, xmm0);
+
+ __ bind(B1_4);
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ return start;
+}
+
+#undef __
diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp
index ba9eb32e8c13e..75611524e3b0a 100644
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -373,6 +373,10 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
// [ lo(arg) ]
// [ hi(arg) ]
//
+ if (kind == Interpreter::java_lang_math_tanh) {
+ return nullptr;
+ }
+
if (kind == Interpreter::java_lang_math_fmaD) {
if (!UseFMA) {
return nullptr; // Generate a vanilla entry
diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp
index 26eea4c1d6a5f..5ea2d8eba259b 100644
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -465,6 +465,10 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
} else {
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan));
}
+ } else if (kind == Interpreter::java_lang_math_tanh) {
+ assert(StubRoutines::dtanh() != nullptr, "not initialized");
+ __ movdbl(xmm0, Address(rsp, wordSize));
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtanh())));
} else if (kind == Interpreter::java_lang_math_abs) {
assert(StubRoutines::x86::double_sign_mask() != nullptr, "not initialized");
__ movdbl(xmm0, Address(rsp, wordSize));
diff --git a/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp b/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp
index e5075e180d9d6..d795c751d02b5 100644
--- a/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp
+++ b/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp
@@ -24,7 +24,7 @@
#include "precompiled.hpp"
#include "prims/upcallLinker.hpp"
-address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
diff --git a/src/hotspot/cpu/x86/upcallLinker_x86_64.cpp b/src/hotspot/cpu/x86/upcallLinker_x86_64.cpp
index 82179f9022e92..bc261bfd93f44 100644
--- a/src/hotspot/cpu/x86/upcallLinker_x86_64.cpp
+++ b/src/hotspot/cpu/x86/upcallLinker_x86_64.cpp
@@ -23,7 +23,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
-#include "code/codeBlob.hpp"
+#include "classfile/javaClasses.hpp"
#include "code/codeBlob.hpp"
#include "code/vmreg.inline.hpp"
#include "compiler/disassembler.hpp"
@@ -169,10 +169,10 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr
__ block_comment("} restore_callee_saved_regs ");
}
-static const int upcall_stub_code_base_size = 1024;
+static const int upcall_stub_code_base_size = 1200;
static const int upcall_stub_size_per_arg = 16;
-address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
@@ -281,7 +281,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
__ block_comment("{ on_entry");
__ vzeroupper();
__ lea(c_rarg0, Address(rsp, frame_data_offset));
- __ movptr(c_rarg1, (intptr_t)receiver);
// stack already aligned
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, UpcallLinker::on_entry)));
__ movptr(r15_thread, rax);
@@ -297,12 +296,10 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
arg_shuffle.generate(_masm, shuffle_reg, abi._shadow_space_bytes, 0);
__ block_comment("} argument shuffle");
- __ block_comment("{ receiver ");
- __ get_vm_result(j_rarg0, r15_thread);
- __ block_comment("} receiver ");
-
- __ mov_metadata(rbx, entry);
- __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // just in case callee is deoptimized
+ __ block_comment("{ load target ");
+ __ movptr(j_rarg0, (intptr_t)receiver);
+ __ call(RuntimeAddress(StubRoutines::upcall_stub_load_target())); // puts target Method* in rbx
+ __ block_comment("} load target ");
__ push_cont_fastpath();
@@ -377,7 +374,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
#ifndef PRODUCT
stringStream ss;
- ss.print("upcall_stub_%s", entry->signature()->as_C_string());
+ ss.print("upcall_stub_%s", signature->as_C_string());
const char* name = _masm->code_string(ss.freeze());
#else // PRODUCT
const char* name = "upcall_stub";
diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp
index 6216cf44b887a..2549feb8a4069 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@@ -1045,6 +1045,10 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false);
}
+ if (!UseAPX) {
+ _features &= ~CPU_APX_F;
+ }
+
if (UseAVX < 2) {
_features &= ~CPU_AVX2;
_features &= ~CPU_AVX_IFMA;
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
index 2b29dd14e4b27..b55a1208cf2df 100644
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@@ -2457,6 +2457,10 @@ bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
mstack.push(m, Visit); // m = ShiftCntV
return true;
}
+ if (is_encode_and_store_pattern(n, m)) {
+ mstack.push(m, Visit);
+ return true;
+ }
return false;
}
diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
index e0740ad7f3124..fee265473befe 100644
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@@ -657,8 +657,7 @@ static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
__ movl(dst, -1);
__ jcc(Assembler::parity, done);
__ jcc(Assembler::below, done);
- __ setb(Assembler::notEqual, dst);
- __ movzbl(dst, dst);
+ __ setcc(Assembler::notEqual, dst);
__ bind(done);
}
@@ -4342,6 +4341,7 @@ instruct loadP(rRegP dst, memory mem)
// Load Compressed Pointer
instruct loadN(rRegN dst, memory mem)
%{
+ predicate(n->as_Load()->barrier_data() == 0);
match(Set dst (LoadN mem));
ins_cost(125); // XXX
@@ -5127,6 +5127,7 @@ instruct storeImmP(memory mem, immP31 src)
// Store Compressed Pointer
instruct storeN(memory mem, rRegN src)
%{
+ predicate(n->as_Store()->barrier_data() == 0);
match(Set mem (StoreN mem src));
ins_cost(125); // XXX
@@ -5151,7 +5152,7 @@ instruct storeNKlass(memory mem, rRegN src)
instruct storeImmN0(memory mem, immN0 zero)
%{
- predicate(CompressedOops::base() == nullptr);
+ predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
match(Set mem (StoreN mem zero));
ins_cost(125); // XXX
@@ -5164,6 +5165,7 @@ instruct storeImmN0(memory mem, immN0 zero)
instruct storeImmN(memory mem, immN src)
%{
+ predicate(n->as_Store()->barrier_data() == 0);
match(Set mem (StoreN mem src));
ins_cost(150); // XXX
@@ -7070,13 +7072,11 @@ instruct compareAndSwapP(rRegI res,
format %{ "cmpxchgq $mem_ptr,$newval\t# "
"If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
- "sete $res\n\t"
- "movzbl $res, $res" %}
+ "setcc $res \t# emits sete + movzbl or setzue for APX" %}
ins_encode %{
__ lock();
__ cmpxchgq($newval$$Register, $mem_ptr$$Address);
- __ setb(Assembler::equal, $res$$Register);
- __ movzbl($res$$Register, $res$$Register);
+ __ setcc(Assembler::equal, $res$$Register);
%}
ins_pipe( pipe_cmpxchg );
%}
@@ -7092,13 +7092,11 @@ instruct compareAndSwapL(rRegI res,
format %{ "cmpxchgq $mem_ptr,$newval\t# "
"If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
- "sete $res\n\t"
- "movzbl $res, $res" %}
+ "setcc $res \t# emits sete + movzbl or setzue for APX" %}
ins_encode %{
__ lock();
__ cmpxchgq($newval$$Register, $mem_ptr$$Address);
- __ setb(Assembler::equal, $res$$Register);
- __ movzbl($res$$Register, $res$$Register);
+ __ setcc(Assembler::equal, $res$$Register);
%}
ins_pipe( pipe_cmpxchg );
%}
@@ -7114,13 +7112,11 @@ instruct compareAndSwapI(rRegI res,
format %{ "cmpxchgl $mem_ptr,$newval\t# "
"If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
- "sete $res\n\t"
- "movzbl $res, $res" %}
+ "setcc $res \t# emits sete + movzbl or setzue for APX" %}
ins_encode %{
__ lock();
__ cmpxchgl($newval$$Register, $mem_ptr$$Address);
- __ setb(Assembler::equal, $res$$Register);
- __ movzbl($res$$Register, $res$$Register);
+ __ setcc(Assembler::equal, $res$$Register);
%}
ins_pipe( pipe_cmpxchg );
%}
@@ -7136,13 +7132,11 @@ instruct compareAndSwapB(rRegI res,
format %{ "cmpxchgb $mem_ptr,$newval\t# "
"If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
- "sete $res\n\t"
- "movzbl $res, $res" %}
+ "setcc $res \t# emits sete + movzbl or setzue for APX" %}
ins_encode %{
__ lock();
__ cmpxchgb($newval$$Register, $mem_ptr$$Address);
- __ setb(Assembler::equal, $res$$Register);
- __ movzbl($res$$Register, $res$$Register);
+ __ setcc(Assembler::equal, $res$$Register);
%}
ins_pipe( pipe_cmpxchg );
%}
@@ -7158,13 +7152,11 @@ instruct compareAndSwapS(rRegI res,
format %{ "cmpxchgw $mem_ptr,$newval\t# "
"If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
- "sete $res\n\t"
- "movzbl $res, $res" %}
+ "setcc $res \t# emits sete + movzbl or setzue for APX" %}
ins_encode %{
__ lock();
__ cmpxchgw($newval$$Register, $mem_ptr$$Address);
- __ setb(Assembler::equal, $res$$Register);
- __ movzbl($res$$Register, $res$$Register);
+ __ setcc(Assembler::equal, $res$$Register);
%}
ins_pipe( pipe_cmpxchg );
%}
@@ -7173,19 +7165,18 @@ instruct compareAndSwapN(rRegI res,
memory mem_ptr,
rax_RegN oldval, rRegN newval,
rFlagsReg cr) %{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
effect(KILL cr, KILL oldval);
format %{ "cmpxchgl $mem_ptr,$newval\t# "
"If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
- "sete $res\n\t"
- "movzbl $res, $res" %}
+ "setcc $res \t# emits sete + movzbl or setzue for APX" %}
ins_encode %{
__ lock();
__ cmpxchgl($newval$$Register, $mem_ptr$$Address);
- __ setb(Assembler::equal, $res$$Register);
- __ movzbl($res$$Register, $res$$Register);
+ __ setcc(Assembler::equal, $res$$Register);
%}
ins_pipe( pipe_cmpxchg );
%}
@@ -7262,6 +7253,7 @@ instruct compareAndExchangeN(
memory mem_ptr,
rax_RegN oldval, rRegN newval,
rFlagsReg cr) %{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
effect(KILL cr);
@@ -7483,6 +7475,7 @@ instruct xchgP( memory mem, rRegP newval) %{
%}
instruct xchgN( memory mem, rRegN newval) %{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set newval (GetAndSetN mem newval));
format %{ "XCHGL $newval,$mem]" %}
ins_encode %{
@@ -9729,13 +9722,11 @@ instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
ins_cost(400);
format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
- "setlt $dst\n\t"
- "movzbl $dst, $dst\n\t"
+ "setcc $dst \t# emits setlt + movzbl or setzul for APX"
"negl $dst" %}
ins_encode %{
__ cmpl($p$$Register, $q$$Register);
- __ setb(Assembler::less, $dst$$Register);
- __ movzbl($dst$$Register, $dst$$Register);
+ __ setcc(Assembler::less, $dst$$Register);
__ negl($dst$$Register);
%}
ins_pipe(pipe_slow);
@@ -11674,6 +11665,7 @@ instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
%{
+ predicate(n->in(2)->as_Load()->barrier_data() == 0);
match(Set cr (CmpN src (LoadN mem)));
format %{ "cmpl $src, $mem\t# compressed ptr" %}
@@ -11695,6 +11687,7 @@ instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
%{
+ predicate(n->in(2)->as_Load()->barrier_data() == 0);
match(Set cr (CmpN src (LoadN mem)));
format %{ "cmpl $mem, $src\t# compressed ptr" %}
@@ -11735,7 +11728,8 @@ instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
%{
- predicate(CompressedOops::base() != nullptr);
+ predicate(CompressedOops::base() != nullptr &&
+ n->in(1)->as_Load()->barrier_data() == 0);
match(Set cr (CmpN (LoadN mem) zero));
ins_cost(500); // XXX
@@ -11748,7 +11742,8 @@ instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
%{
- predicate(CompressedOops::base() == nullptr);
+ predicate(CompressedOops::base() == nullptr &&
+ n->in(1)->as_Load()->barrier_data() == 0);
match(Set cr (CmpN (LoadN mem) zero));
format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
@@ -11860,16 +11855,14 @@ instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
"movl $dst, -1\n\t"
"jb,u done\n\t"
- "setne $dst\n\t"
- "movzbl $dst, $dst\n\t"
+ "setcc $dst \t# emits setne + movzbl or setzune for APX"
"done:" %}
ins_encode %{
Label done;
__ cmpl($src1$$Register, $src2$$Register);
__ movl($dst$$Register, -1);
__ jccb(Assembler::below, done);
- __ setb(Assembler::notZero, $dst$$Register);
- __ movzbl($dst$$Register, $dst$$Register);
+ __ setcc(Assembler::notZero, $dst$$Register);
__ bind(done);
%}
ins_pipe(pipe_slow);
@@ -11886,16 +11879,14 @@ instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
"movl $dst, -1\n\t"
"jl,s done\n\t"
- "setne $dst\n\t"
- "movzbl $dst, $dst\n\t"
+ "setcc $dst \t# emits setne + movzbl or setzune for APX"
"done:" %}
ins_encode %{
Label done;
__ cmpq($src1$$Register, $src2$$Register);
__ movl($dst$$Register, -1);
__ jccb(Assembler::less, done);
- __ setb(Assembler::notZero, $dst$$Register);
- __ movzbl($dst$$Register, $dst$$Register);
+ __ setcc(Assembler::notZero, $dst$$Register);
__ bind(done);
%}
ins_pipe(pipe_slow);
@@ -11912,16 +11903,14 @@ instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
"movl $dst, -1\n\t"
"jb,u done\n\t"
- "setne $dst\n\t"
- "movzbl $dst, $dst\n\t"
+ "setcc $dst \t# emits setne + movzbl or setzune for APX"
"done:" %}
ins_encode %{
Label done;
__ cmpq($src1$$Register, $src2$$Register);
__ movl($dst$$Register, -1);
__ jccb(Assembler::below, done);
- __ setb(Assembler::notZero, $dst$$Register);
- __ movzbl($dst$$Register, $dst$$Register);
+ __ setcc(Assembler::notZero, $dst$$Register);
__ bind(done);
%}
ins_pipe(pipe_slow);
diff --git a/src/hotspot/cpu/zero/upcallLinker_zero.cpp b/src/hotspot/cpu/zero/upcallLinker_zero.cpp
index 6447dac86c915..408ebc328205d 100644
--- a/src/hotspot/cpu/zero/upcallLinker_zero.cpp
+++ b/src/hotspot/cpu/zero/upcallLinker_zero.cpp
@@ -24,7 +24,7 @@
#include "precompiled.hpp"
#include "prims/upcallLinker.hpp"
-address UpcallLinker::make_upcall_stub(jobject mh, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject mh, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
diff --git a/src/hotspot/cpu/zero/vm_version_zero.cpp b/src/hotspot/cpu/zero/vm_version_zero.cpp
index 1fcf4b1086253..7312dd116468c 100644
--- a/src/hotspot/cpu/zero/vm_version_zero.cpp
+++ b/src/hotspot/cpu/zero/vm_version_zero.cpp
@@ -116,11 +116,6 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
}
- if ((LockingMode != LM_LEGACY) && (LockingMode != LM_MONITOR)) {
- warning("Unsupported locking mode for this CPU.");
- FLAG_SET_DEFAULT(LockingMode, LM_LEGACY);
- }
-
// Enable error context decoding on known platforms
#if defined(IA32) || defined(AMD64) || defined(ARM) || \
defined(AARCH64) || defined(PPC) || defined(RISCV) || \
diff --git a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
index 2b53042ef1017..aab43e733964e 100644
--- a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
+++ b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
@@ -485,26 +485,30 @@ int ZeroInterpreter::native_entry(Method* method, intptr_t UNUSED, TRAPS) {
// Unlock if necessary
if (monitor) {
- BasicLock *lock = monitor->lock();
- markWord header = lock->displaced_header();
- oop rcvr = monitor->obj();
- monitor->set_obj(nullptr);
-
- bool dec_monitor_count = true;
- if (header.to_pointer() != nullptr) {
- markWord old_header = markWord::encode(lock);
- if (rcvr->cas_set_mark(header, old_header) != old_header) {
- monitor->set_obj(rcvr);
- dec_monitor_count = false;
- InterpreterRuntime::monitorexit(monitor);
+ bool success = false;
+ if (LockingMode == LM_LEGACY) {
+ BasicLock* lock = monitor->lock();
+ oop rcvr = monitor->obj();
+ monitor->set_obj(nullptr);
+ success = true;
+ markWord header = lock->displaced_header();
+ if (header.to_pointer() != nullptr) { // Check for recursive lock
+ markWord old_header = markWord::encode(lock);
+ if (rcvr->cas_set_mark(header, old_header) != old_header) {
+ monitor->set_obj(rcvr);
+ success = false;
+ }
+ }
+ if (success) {
+ THREAD->dec_held_monitor_count();
}
}
- if (dec_monitor_count) {
- THREAD->dec_held_monitor_count();
+ if (!success) {
+ InterpreterRuntime::monitorexit(monitor);
}
}
- unwind_and_return:
+ unwind_and_return:
// Unwind the current activation
thread->pop_zero_frame();
diff --git a/src/hotspot/os/bsd/gc/z/zPhysicalMemoryBacking_bsd.cpp b/src/hotspot/os/bsd/gc/z/zPhysicalMemoryBacking_bsd.cpp
index 29825a9eab291..2e56c092a79b5 100644
--- a/src/hotspot/os/bsd/gc/z/zPhysicalMemoryBacking_bsd.cpp
+++ b/src/hotspot/os/bsd/gc/z/zPhysicalMemoryBacking_bsd.cpp
@@ -22,10 +22,10 @@
*/
#include "precompiled.hpp"
-#include "gc/shared/gcLogPrecious.hpp"
#include "gc/z/zAddress.inline.hpp"
#include "gc/z/zErrno.hpp"
#include "gc/z/zGlobals.hpp"
+#include "gc/z/zInitialize.hpp"
#include "gc/z/zLargePages.inline.hpp"
#include "gc/z/zPhysicalMemory.inline.hpp"
#include "gc/z/zPhysicalMemoryBacking_bsd.hpp"
@@ -82,7 +82,7 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity)
_base = (uintptr_t)os::reserve_memory(max_capacity);
if (_base == 0) {
// Failed
- log_error_pd(gc)("Failed to reserve address space for backing memory");
+ ZInitialize::error("Failed to reserve address space for backing memory");
return;
}
diff --git a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp
index b80124cc34e43..b648876ac602c 100644
--- a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp
+++ b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp
@@ -27,6 +27,7 @@
#include "gc/z/zArray.inline.hpp"
#include "gc/z/zErrno.hpp"
#include "gc/z/zGlobals.hpp"
+#include "gc/z/zInitialize.hpp"
#include "gc/z/zLargePages.inline.hpp"
#include "gc/z/zMountPoint_linux.hpp"
#include "gc/z/zNUMA.inline.hpp"
@@ -103,14 +104,14 @@
#define ZFILENAME_HEAP "java_heap"
// Preferred tmpfs mount points, ordered by priority
-static const char* z_preferred_tmpfs_mountpoints[] = {
+static const char* ZPreferredTmpfsMountpoints[] = {
"/dev/shm",
"/run/shm",
nullptr
};
// Preferred hugetlbfs mount points, ordered by priority
-static const char* z_preferred_hugetlbfs_mountpoints[] = {
+static const char* ZPreferredHugetlbfsMountpoints[] = {
"/dev/hugepages",
"/hugepages",
nullptr
@@ -129,6 +130,7 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity)
// Create backing file
_fd = create_fd(ZFILENAME_HEAP);
if (_fd == -1) {
+ ZInitialize::error("Failed to create heap backing file");
return;
}
@@ -136,7 +138,7 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity)
while (ftruncate(_fd, max_capacity) == -1) {
if (errno != EINTR) {
ZErrno err;
- log_error_p(gc)("Failed to truncate backing file (%s)", err.to_string());
+ ZInitialize::error("Failed to truncate backing file (%s)", err.to_string());
return;
}
}
@@ -145,7 +147,7 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity)
struct statfs buf;
if (fstatfs(_fd, &buf) == -1) {
ZErrno err;
- log_error_p(gc)("Failed to determine filesystem type for backing file (%s)", err.to_string());
+ ZInitialize::error("Failed to determine filesystem type for backing file (%s)", err.to_string());
return;
}
@@ -158,39 +160,39 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity)
// Make sure the filesystem type matches requested large page type
if (ZLargePages::is_transparent() && !is_tmpfs()) {
- log_error_p(gc)("-XX:+UseTransparentHugePages can only be enabled when using a %s filesystem",
- ZFILESYSTEM_TMPFS);
+ ZInitialize::error("-XX:+UseTransparentHugePages can only be enabled when using a %s filesystem",
+ ZFILESYSTEM_TMPFS);
return;
}
if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) {
- log_error_p(gc)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel",
- ZFILESYSTEM_TMPFS);
+ ZInitialize::error("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel",
+ ZFILESYSTEM_TMPFS);
return;
}
if (ZLargePages::is_explicit() && !is_hugetlbfs()) {
- log_error_p(gc)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled "
- "when using a %s filesystem", ZFILESYSTEM_HUGETLBFS);
+ ZInitialize::error("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled "
+ "when using a %s filesystem", ZFILESYSTEM_HUGETLBFS);
return;
}
if (!ZLargePages::is_explicit() && is_hugetlbfs()) {
- log_error_p(gc)("-XX:+UseLargePages must be enabled when using a %s filesystem",
- ZFILESYSTEM_HUGETLBFS);
+ ZInitialize::error("-XX:+UseLargePages must be enabled when using a %s filesystem",
+ ZFILESYSTEM_HUGETLBFS);
return;
}
// Make sure the filesystem block size is compatible
if (ZGranuleSize % _block_size != 0) {
- log_error_p(gc)("Filesystem backing the heap has incompatible block size (" SIZE_FORMAT ")",
- _block_size);
+ ZInitialize::error("Filesystem backing the heap has incompatible block size (" SIZE_FORMAT ")",
+ _block_size);
return;
}
if (is_hugetlbfs() && _block_size != ZGranuleSize) {
- log_error_p(gc)("%s filesystem has unexpected block size " SIZE_FORMAT " (expected " SIZE_FORMAT ")",
- ZFILESYSTEM_HUGETLBFS, _block_size, ZGranuleSize);
+ ZInitialize::error("%s filesystem has unexpected block size " SIZE_FORMAT " (expected " SIZE_FORMAT ")",
+ ZFILESYSTEM_HUGETLBFS, _block_size, ZGranuleSize);
return;
}
@@ -226,8 +228,8 @@ int ZPhysicalMemoryBacking::create_file_fd(const char* name) const {
? ZFILESYSTEM_HUGETLBFS
: ZFILESYSTEM_TMPFS;
const char** const preferred_mountpoints = ZLargePages::is_explicit()
- ? z_preferred_hugetlbfs_mountpoints
- : z_preferred_tmpfs_mountpoints;
+ ? ZPreferredHugetlbfsMountpoints
+ : ZPreferredTmpfsMountpoints;
// Find mountpoint
ZMountPoint mountpoint(filesystem, preferred_mountpoints);
diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
index d4699567733b2..609317df45fc2 100644
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -4602,7 +4602,7 @@ static void workaround_expand_exec_shield_cs_limit() {
return; // No matter, we tried, best effort.
}
- MemTracker::record_virtual_memory_type((address)codebuf, mtInternal);
+ MemTracker::record_virtual_memory_tag((address)codebuf, mtInternal);
log_info(os)("[CS limit NX emulation work-around, exec code at: %p]", codebuf);
diff --git a/src/hotspot/os/posix/os_posix.cpp b/src/hotspot/os/posix/os_posix.cpp
index 26bff6c8bd4e6..60efdeb2ef59a 100644
--- a/src/hotspot/os/posix/os_posix.cpp
+++ b/src/hotspot/os/posix/os_posix.cpp
@@ -367,7 +367,7 @@ bool os::dir_is_empty(const char* path) {
return result;
}
-static char* reserve_mmapped_memory(size_t bytes, char* requested_addr, MEMFLAGS flag) {
+static char* reserve_mmapped_memory(size_t bytes, char* requested_addr, MemTag mem_tag) {
char * addr;
int flags = MAP_PRIVATE NOT_AIX( | MAP_NORESERVE ) | MAP_ANONYMOUS;
if (requested_addr != nullptr) {
@@ -382,7 +382,7 @@ static char* reserve_mmapped_memory(size_t bytes, char* requested_addr, MEMFLAGS
flags, -1, 0);
if (addr != MAP_FAILED) {
- MemTracker::record_virtual_memory_reserve((address)addr, bytes, CALLER_PC, flag);
+ MemTracker::record_virtual_memory_reserve((address)addr, bytes, CALLER_PC, mem_tag);
return addr;
}
return nullptr;
@@ -495,7 +495,7 @@ char* os::reserve_memory_aligned(size_t size, size_t alignment, bool exec) {
return chop_extra_memory(size, alignment, extra_base, extra_size);
}
-char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int file_desc, MEMFLAGS flag) {
+char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int file_desc, MemTag mem_tag) {
size_t extra_size = calculate_aligned_extra_size(size, alignment);
// For file mapping, we do not call os:map_memory_to_file(size,fd) since:
// - we later chop away parts of the mapping using os::release_memory and that could fail if the
@@ -503,7 +503,7 @@ char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int file_des
// - The memory API os::reserve_memory uses is an implementation detail. It may (and usually is)
// mmap but it also may System V shared memory which cannot be uncommitted as a whole, so
// chopping off and unmapping excess bits back and front (see below) would not work.
- char* extra_base = reserve_mmapped_memory(extra_size, nullptr, flag);
+ char* extra_base = reserve_mmapped_memory(extra_size, nullptr, mem_tag);
if (extra_base == nullptr) {
return nullptr;
}
diff --git a/src/hotspot/os/windows/memMapPrinter_windows.cpp b/src/hotspot/os/windows/memMapPrinter_windows.cpp
new file mode 100644
index 0000000000000..eb6b24a9d139a
--- /dev/null
+++ b/src/hotspot/os/windows/memMapPrinter_windows.cpp
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2024, Red Hat, Inc. and/or its affiliates.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "nmt/memMapPrinter.hpp"
+#include "os_windows.hpp"
+#include "runtime/vm_version.hpp"
+
+#include
+#include
+#include
+#include
+
+/* maximum number of mapping records returned */
+static const int MAX_REGIONS_RETURNED = 1000000;
+
+class MappingInfo {
+public:
+ stringStream _ap_buffer;
+ stringStream _state_buffer;
+ stringStream _protect_buffer;
+ stringStream _type_buffer;
+ char _file_name[MAX_PATH];
+
+ MappingInfo() {}
+
+ void process(MEMORY_BASIC_INFORMATION& mem_info) {
+ _ap_buffer.reset();
+ _state_buffer.reset();
+ _protect_buffer.reset();
+ _type_buffer.reset();
+ get_protect_string(_ap_buffer, mem_info.AllocationProtect);
+ get_state_string(_state_buffer, mem_info);
+ get_protect_string(_protect_buffer, mem_info.Protect);
+ get_type_string(_type_buffer, mem_info);
+ _file_name[0] = 0;
+ if (mem_info.Type == MEM_IMAGE) {
+ HMODULE hModule = 0;
+ if (GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, static_cast(mem_info.AllocationBase), &hModule)) {
+ GetModuleFileName(hModule, _file_name, sizeof(_file_name));
+ }
+ }
+ }
+
+ void get_protect_string(outputStream& out, DWORD prot) {
+ const char read_c = prot & (PAGE_READONLY | PAGE_READWRITE | PAGE_EXECUTE_READ | PAGE_EXECUTE_READWRITE | PAGE_WRITECOPY | PAGE_EXECUTE_WRITECOPY) ? 'r' : '-';
+ const char write_c = prot & (PAGE_READWRITE | PAGE_WRITECOPY | PAGE_EXECUTE_READWRITE | PAGE_EXECUTE_WRITECOPY) ? 'w' : '-';
+ const char execute_c = prot & (PAGE_EXECUTE | PAGE_EXECUTE_READ | PAGE_EXECUTE_READWRITE | PAGE_EXECUTE_WRITECOPY) ? 'x' : '-';
+ out.print("%c%c%c", read_c, write_c, execute_c);
+ if (prot & (PAGE_WRITECOPY | PAGE_EXECUTE_WRITECOPY)) {
+ out.put('c');
+ }
+ if (prot & PAGE_GUARD) {
+ out.put('g');
+ }
+ if (prot & PAGE_NOCACHE) {
+ out.put('n');
+ }
+ if (prot & PAGE_WRITECOMBINE) {
+ out.put('W');
+ }
+ const DWORD bits = PAGE_NOACCESS | PAGE_READONLY | PAGE_READWRITE | PAGE_EXECUTE_READ | PAGE_EXECUTE_READWRITE
+ | PAGE_WRITECOPY | PAGE_EXECUTE_WRITECOPY | PAGE_EXECUTE
+ | PAGE_GUARD | PAGE_NOCACHE | PAGE_WRITECOMBINE;
+ if ((prot & bits) != prot) {
+ out.print_cr("Unknown Windows memory protection value: 0x%x unknown bits: 0x%x", prot, prot & ~bits);
+ assert(false, "Unknown Windows memory protection value: 0x%x unknown bits: 0x%x", prot, prot & ~bits);
+ }
+ }
+
+ void get_state_string(outputStream& out, MEMORY_BASIC_INFORMATION& mem_info) {
+ if (mem_info.State == MEM_COMMIT) {
+ out.put('c');
+ } else if (mem_info.State == MEM_FREE) {
+ out.put('f');
+ } else if (mem_info.State == MEM_RESERVE) {
+ out.put('r');
+ } else {
+ out.print_cr("Unknown Windows memory state value: 0x%x", mem_info.State);
+ assert(false, "Unknown Windows memory state value: 0x%x", mem_info.State);
+ }
+ }
+
+ void get_type_string(outputStream& out, MEMORY_BASIC_INFORMATION& mem_info) {
+ if (mem_info.Type == MEM_IMAGE) {
+ out.print("img");
+ } else if (mem_info.Type == MEM_MAPPED) {
+ out.print("map");
+ } else if (mem_info.Type == MEM_PRIVATE) {
+ out.print("pvt");
+ } else if (mem_info.Type == 0 && mem_info.State == MEM_FREE) {
+ out.print("---");
+ } else {
+ out.print_cr("Unknown Windows memory type 0x%x", mem_info.Type);
+ assert(false, "Unknown Windows memory type 0x%x", mem_info.Type);
+ }
+ }
+};
+
+class MappingInfoSummary {
+ unsigned _num_mappings;
+ size_t _total_region_size; // combined resident set size
+ size_t _total_committed; // combined committed size
+ class WinOsInfo : public os::win32 {
+ public:
+ static void printOsInfo(outputStream* st) {
+ st->print("OS:");
+ os::win32::print_windows_version(st);
+ os::win32::print_uptime_info(st);
+ VM_Version::print_platform_virtualization_info(st);
+ os::print_memory_info(st);
+ }
+ };
+public:
+ MappingInfoSummary() : _num_mappings(0), _total_region_size(0),
+ _total_committed(0) {}
+
+ void add_mapping(const MEMORY_BASIC_INFORMATION& mem_info, const MappingInfo& mapping_info) {
+ if (mem_info.State != MEM_FREE) {
+ _num_mappings++;
+ _total_region_size += mem_info.RegionSize;
+ _total_committed += mem_info.State == MEM_COMMIT ? mem_info.RegionSize : 0;
+ }
+ }
+
+ void print_on(const MappingPrintSession& session) const {
+ outputStream* st = session.out();
+ WinOsInfo::printOsInfo(st);
+ st->print_cr("current process reserved memory: " PROPERFMT, PROPERFMTARGS(_total_region_size));
+ st->print_cr("current process committed memory: " PROPERFMT, PROPERFMTARGS(_total_committed));
+ st->print_cr("current process region count: " PROPERFMT, PROPERFMTARGS(_num_mappings));
+ }
+};
+
+class MappingInfoPrinter {
+ const MappingPrintSession& _session;
+public:
+ MappingInfoPrinter(const MappingPrintSession& session) :
+ _session(session)
+ {}
+
+ void print_single_mapping(const MEMORY_BASIC_INFORMATION& mem_info, const MappingInfo& mapping_info) const {
+ outputStream* st = _session.out();
+#define INDENT_BY(n) \
+ if (st->fill_to(n) == 0) { \
+ st->print(" "); \
+ }
+ st->print(PTR_FORMAT "-" PTR_FORMAT, mem_info.BaseAddress, static_cast(mem_info.BaseAddress) + mem_info.RegionSize);
+ INDENT_BY(38);
+ st->print("%12zu", mem_info.RegionSize);
+ INDENT_BY(51);
+ st->print("%s", mapping_info._protect_buffer.base());
+ INDENT_BY(57);
+ st->print("%s-%s", mapping_info._state_buffer.base(), mapping_info._type_buffer.base());
+ INDENT_BY(63);
+ st->print("%#11llx", reinterpret_cast(mem_info.BaseAddress) - reinterpret_cast(mem_info.AllocationBase));
+ INDENT_BY(72);
+ if (_session.print_nmt_info_for_region(mem_info.BaseAddress, static_cast(mem_info.BaseAddress) + mem_info.RegionSize)) {
+ st->print(" ");
+ }
+ st->print_raw(mapping_info._file_name);
+ #undef INDENT_BY
+ st->cr();
+ }
+
+ void print_legend() const {
+ outputStream* st = _session.out();
+ st->print_cr("from, to, vsize: address range and size");
+ st->print_cr("prot: protection:");
+ st->print_cr(" rwx: read / write / execute");
+ st->print_cr(" c: copy on write");
+ st->print_cr(" g: guard");
+ st->print_cr(" n: no cache");
+ st->print_cr(" W: write combine");
+ st->print_cr("state: region state and type:");
+ st->print_cr(" state: committed / reserved");
+ st->print_cr(" type: image / mapped / private");
+ st->print_cr("file: file mapped, if mapping is not anonymous");
+ st->print_cr("vm info: VM information (requires NMT)");
+ {
+ streamIndentor si(st, 16);
+ _session.print_nmt_flag_legend();
+ }
+ }
+
+ void print_header() const {
+ outputStream* st = _session.out();
+ // 0 1 2 3 4 5 6 7 8 9 0 1 2 3
+ // 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+ // 0x00007ffb24565000-0x00007ffb24a7e000 5345280 r-- c-img 0x1155000 C:\work\jdk\build\fastdebug\jdk\bin\server\jvm.dll
+ st->print_cr("from to vsize prot state offset vminfo/file");
+ st->print_cr("===========================================================================================");
+ }
+};
+
+void MemMapPrinter::pd_print_all_mappings(const MappingPrintSession& session) {
+
+ HANDLE hProcess = GetCurrentProcess();
+
+ MappingInfoPrinter printer(session);
+ MappingInfoSummary summary;
+
+ outputStream* const st = session.out();
+
+ printer.print_legend();
+ st->cr();
+ printer.print_header();
+
+ MEMORY_BASIC_INFORMATION mem_info;
+ MappingInfo mapping_info;
+
+ int region_count = 0;
+ ::memset(&mem_info, 0, sizeof(mem_info));
+ for (char* ptr = 0; VirtualQueryEx(hProcess, ptr, &mem_info, sizeof(mem_info)) == sizeof(mem_info); ) {
+ assert(mem_info.RegionSize > 0, "RegionSize is not greater than zero");
+ if (++region_count > MAX_REGIONS_RETURNED) {
+ st->print_cr("limit of %d regions reached (results inaccurate)", region_count);
+ break;
+ }
+ mapping_info.process(mem_info);
+ if (mem_info.State != MEM_FREE) {
+ printer.print_single_mapping(mem_info, mapping_info);
+ summary.add_mapping(mem_info, mapping_info);
+ }
+ ptr += mem_info.RegionSize;
+ ::memset(&mem_info, 0, sizeof(mem_info));
+ }
+ st->cr();
+ summary.print_on(session);
+ st->cr();
+}
diff --git a/src/hotspot/os/windows/os_windows.cpp b/src/hotspot/os/windows/os_windows.cpp
index a1e0a78837f74..817757f1ac6a2 100644
--- a/src/hotspot/os/windows/os_windows.cpp
+++ b/src/hotspot/os/windows/os_windows.cpp
@@ -1947,7 +1947,10 @@ void os::win32::print_windows_version(outputStream* st) {
// - 2016 GA 10/2016 build: 14393
// - 2019 GA 11/2018 build: 17763
// - 2022 GA 08/2021 build: 20348
- if (build_number > 20347) {
+ // - 2025 Preview build : 26040
+ if (build_number > 26039) {
+ st->print("Server 2025");
+ } else if (build_number > 20347) {
st->print("Server 2022");
} else if (build_number > 17762) {
st->print("Server 2019");
@@ -3428,7 +3431,7 @@ char* os::replace_existing_mapping_with_file_mapping(char* base, size_t size, in
// Multiple threads can race in this code but it's not possible to unmap small sections of
// virtual space to get requested alignment, like posix-like os's.
// Windows prevents multiple thread from remapping over each other so this loop is thread-safe.
-static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int file_desc, MEMFLAGS flag = mtNone) {
+static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int file_desc, MemTag mem_tag = mtNone) {
assert(is_aligned(alignment, os::vm_allocation_granularity()),
"Alignment must be a multiple of allocation granularity (page size)");
assert(is_aligned(size, os::vm_allocation_granularity()),
@@ -3441,8 +3444,8 @@ static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int fi
static const int max_attempts = 20;
for (int attempt = 0; attempt < max_attempts && aligned_base == nullptr; attempt ++) {
- char* extra_base = file_desc != -1 ? os::map_memory_to_file(extra_size, file_desc, flag) :
- os::reserve_memory(extra_size, false, flag);
+ char* extra_base = file_desc != -1 ? os::map_memory_to_file(extra_size, file_desc, mem_tag) :
+ os::reserve_memory(extra_size, false, mem_tag);
if (extra_base == nullptr) {
return nullptr;
}
@@ -3458,8 +3461,8 @@ static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int fi
// Attempt to map, into the just vacated space, the slightly smaller aligned area.
// Which may fail, hence the loop.
- aligned_base = file_desc != -1 ? os::attempt_map_memory_to_file_at(aligned_base, size, file_desc, flag) :
- os::attempt_reserve_memory_at(aligned_base, size, false, flag);
+ aligned_base = file_desc != -1 ? os::attempt_map_memory_to_file_at(aligned_base, size, file_desc, mem_tag) :
+ os::attempt_reserve_memory_at(aligned_base, size, false, mem_tag);
}
assert(aligned_base != nullptr,
@@ -3473,8 +3476,8 @@ char* os::reserve_memory_aligned(size_t size, size_t alignment, bool exec) {
return map_or_reserve_memory_aligned(size, alignment, -1 /* file_desc */);
}
-char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int fd, MEMFLAGS flag) {
- return map_or_reserve_memory_aligned(size, alignment, fd, flag);
+char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int fd, MemTag mem_tag) {
+ return map_or_reserve_memory_aligned(size, alignment, fd, mem_tag);
}
char* os::pd_reserve_memory(size_t bytes, bool exec) {
@@ -4090,6 +4093,39 @@ int os::win32::_build_minor = 0;
bool os::win32::_processor_group_warning_displayed = false;
bool os::win32::_job_object_processor_group_warning_displayed = false;
+void getWindowsInstallationType(char* buffer, int bufferSize) {
+ HKEY hKey;
+ const char* subKey = "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion";
+ const char* valueName = "InstallationType";
+
+ DWORD valueLength = bufferSize;
+
+ // Initialize buffer with empty string
+ buffer[0] = '\0';
+
+ // Open the registry key
+ if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, subKey, 0, KEY_READ, &hKey) != ERROR_SUCCESS) {
+ // Return empty buffer if key cannot be opened
+ return;
+ }
+
+ // Query the value
+ if (RegQueryValueExA(hKey, valueName, NULL, NULL, (LPBYTE)buffer, &valueLength) != ERROR_SUCCESS) {
+ RegCloseKey(hKey);
+ buffer[0] = '\0';
+ return;
+ }
+
+ RegCloseKey(hKey);
+}
+
+bool isNanoServer() {
+ const int BUFFER_SIZE = 256;
+ char installationType[BUFFER_SIZE];
+ getWindowsInstallationType(installationType, BUFFER_SIZE);
+ return (strcmp(installationType, "Nano Server") == 0);
+}
+
void os::win32::initialize_windows_version() {
assert(_major_version == 0, "windows version already initialized.");
@@ -4107,7 +4143,13 @@ void os::win32::initialize_windows_version() {
warning("Attempt to determine system directory failed: %s", buf_len != 0 ? error_msg_buffer : "");
return;
}
- strncat(kernel32_path, "\\kernel32.dll", MAX_PATH - ret);
+
+ if (isNanoServer()) {
+ // On Windows Nanoserver the kernel32.dll is located in the forwarders subdirectory
+ strncat(kernel32_path, "\\forwarders\\kernel32.dll", MAX_PATH - ret);
+ } else {
+ strncat(kernel32_path, "\\kernel32.dll", MAX_PATH - ret);
+ }
DWORD version_size = GetFileVersionInfoSize(kernel32_path, nullptr);
if (version_size == 0) {
diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
index a7dc84770f84c..368d6c971fae0 100644
--- a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
+++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
@@ -54,6 +54,24 @@ inline void OrderAccess::fence() {
}
inline void OrderAccess::cross_modify_fence_impl() {
+ // From 3 “Zifencei” Instruction-Fetch Fence, Version 2.0
+ // "RISC-V does not guarantee that stores to instruction memory will be made
+ // visible to instruction fetches on a RISC-V hart until that hart executes a
+ // FENCE.I instruction. A FENCE.I instruction ensures that a subsequent
+ // instruction fetch on a RISC-V hart will see any previous data stores
+ // already visible to the same RISC-V hart. FENCE.I does not ensure that other
+ // RISC-V harts’ instruction fetches will observe the local hart’s stores in a
+ // multiprocessor system."
+ //
+ // Hence to be able to use fence.i directly we need a kernel that supports
+ // PR_RISCV_CTX_SW_FENCEI_ON. Thus if context switch to another hart we are
+ // ensured that instruction fetch will see any previous data stores
+ //
+ // The alternative is using full system IPI (system wide icache sync) then
+ // this barrier is not strictly needed. As this is emitted in runtime slow-path
+ // we will just always emit it, typically after a safepoint.
+ guarantee(VM_Version::supports_fencei_barrier(), "Linux kernel require fence.i");
+ __asm__ volatile("fence.i" : : : "memory");
}
#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
index 3f9f26b525ba5..a3a226502f6fc 100644
--- a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
@@ -35,6 +35,7 @@
#include
#include
#include
+#include
#ifndef HWCAP_ISA_I
#define HWCAP_ISA_I nth_bit('I' - 'A')
@@ -82,6 +83,23 @@
__v; \
})
+// prctl PR_RISCV_SET_ICACHE_FLUSH_CTX is from Linux 6.9
+#ifndef PR_RISCV_SET_ICACHE_FLUSH_CTX
+#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71
+#endif
+#ifndef PR_RISCV_CTX_SW_FENCEI_ON
+#define PR_RISCV_CTX_SW_FENCEI_ON 0
+#endif
+#ifndef PR_RISCV_CTX_SW_FENCEI_OFF
+#define PR_RISCV_CTX_SW_FENCEI_OFF 1
+#endif
+#ifndef PR_RISCV_SCOPE_PER_PROCESS
+#define PR_RISCV_SCOPE_PER_PROCESS 0
+#endif
+#ifndef PR_RISCV_SCOPE_PER_THREAD
+#define PR_RISCV_SCOPE_PER_THREAD 1
+#endif
+
uint32_t VM_Version::cpu_vector_length() {
assert(ext_V.enabled(), "should not call this");
return (uint32_t)read_csr(CSR_VLENB);
@@ -102,6 +120,7 @@ void VM_Version::setup_cpu_available_features() {
if (!RiscvHwprobe::probe_features()) {
os_aux_features();
}
+
char* uarch = os_uarch_additional_features();
vendor_features();
@@ -155,6 +174,24 @@ void VM_Version::setup_cpu_available_features() {
i++;
}
+ // Linux kernel require Zifencei
+ if (!ext_Zifencei.enabled()) {
+ log_info(os, cpu)("Zifencei not found, required by Linux, enabling.");
+ ext_Zifencei.enable_feature();
+ }
+
+ if (UseCtxFencei) {
+ // Note that we can set this up only for effected threads
+ // via PR_RISCV_SCOPE_PER_THREAD, i.e. on VM attach/deattach.
+ int ret = prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_ON, PR_RISCV_SCOPE_PER_PROCESS);
+ if (ret == 0) {
+ log_debug(os, cpu)("UseCtxFencei (PR_RISCV_CTX_SW_FENCEI_ON) enabled.");
+ } else {
+ FLAG_SET_ERGO(UseCtxFencei, false);
+ log_info(os, cpu)("UseCtxFencei (PR_RISCV_CTX_SW_FENCEI_ON) disabled, unsupported by kernel.");
+ }
+ }
+
_features_string = os::strdup(buf);
}
diff --git a/src/hotspot/share/adlc/adlArena.cpp b/src/hotspot/share/adlc/adlArena.cpp
index d5a1dd500fa66..ebd1f74911d57 100644
--- a/src/hotspot/share/adlc/adlArena.cpp
+++ b/src/hotspot/share/adlc/adlArena.cpp
@@ -63,8 +63,6 @@ void AdlChunk::chop() {
AdlChunk *k = this;
while( k ) {
AdlChunk *tmp = k->_next;
- // clear out this chunk (to detect allocation bugs)
- memset(k, 0xBE, k->_len);
free(k); // Free chunk (was malloc'd)
k = tmp;
}
diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
index e7df38ff221a9..15bc7ddc67d60 100644
--- a/src/hotspot/share/adlc/formssel.cpp
+++ b/src/hotspot/share/adlc/formssel.cpp
@@ -4357,7 +4357,7 @@ bool MatchRule::is_vector() const {
"RoundDoubleModeV","RotateLeftV" , "RotateRightV", "LoadVector","StoreVector",
"LoadVectorGather", "StoreVectorScatter", "LoadVectorGatherMasked", "StoreVectorScatterMasked",
"VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert",
- "VectorRearrange","VectorLoadShuffle", "VectorLoadConst",
+ "VectorRearrange", "VectorLoadShuffle", "VectorLoadConst",
"VectorCastB2X", "VectorCastS2X", "VectorCastI2X",
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X", "VectorCastF2HF", "VectorCastHF2F",
"VectorUCastB2X", "VectorUCastS2X", "VectorUCastI2X",
diff --git a/src/hotspot/share/asm/register.hpp b/src/hotspot/share/asm/register.hpp
index 7dfc9b03f878d..6078edecb4ad1 100644
--- a/src/hotspot/share/asm/register.hpp
+++ b/src/hotspot/share/asm/register.hpp
@@ -276,19 +276,23 @@ inline constexpr bool different_registers(R first_register, Rx... more_registers
}
template
-inline void assert_different_registers(R first_register, Rx... more_registers) {
+inline void assert_different_registers_impl(const char* file, int line, R first_register, Rx... more_registers) {
#ifdef ASSERT
if (!different_registers(first_register, more_registers...)) {
const R regs[] = { first_register, more_registers... };
// Find a duplicate entry.
for (size_t i = 0; i < ARRAY_SIZE(regs) - 1; ++i) {
for (size_t j = i + 1; j < ARRAY_SIZE(regs); ++j) {
- assert(!regs[i]->is_valid() || regs[i] != regs[j],
- "Multiple uses of register: %s", regs[i]->name());
+ if (regs[i]->is_valid()) {
+ assert_with_file_and_line(regs[i] != regs[j], file, line, "regs[%zu] and regs[%zu] are both: %s",
+ i, j, regs[i]->name());
+ }
}
}
}
#endif
}
+#define assert_different_registers(...) assert_different_registers_impl(__FILE__, __LINE__, __VA_ARGS__)
+
#endif // SHARE_ASM_REGISTER_HPP
diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp
index 04e379842e152..9abfa45785bda 100644
--- a/src/hotspot/share/c1/c1_CodeStubs.hpp
+++ b/src/hotspot/share/c1/c1_CodeStubs.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -264,10 +264,10 @@ class NewInstanceStub: public CodeStub {
LIR_Opr _klass_reg;
LIR_Opr _result;
CodeEmitInfo* _info;
- Runtime1::StubID _stub_id;
+ C1StubId _stub_id;
public:
- NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id);
+ NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, C1StubId stub_id);
virtual void emit_code(LIR_Assembler* e);
virtual CodeEmitInfo* info() const { return _info; }
virtual void visit(LIR_OpVisitState* visitor) {
@@ -515,11 +515,11 @@ class DeoptimizeStub : public CodeStub {
class SimpleExceptionStub: public CodeStub {
private:
LIR_Opr _obj;
- Runtime1::StubID _stub;
+ C1StubId _stub;
CodeEmitInfo* _info;
public:
- SimpleExceptionStub(Runtime1::StubID stub, LIR_Opr obj, CodeEmitInfo* info):
+ SimpleExceptionStub(C1StubId stub, LIR_Opr obj, CodeEmitInfo* info):
_obj(obj), _stub(stub), _info(info) {
FrameMap* f = Compilation::current()->frame_map();
f->update_reserved_argument_area_size(2 * BytesPerWord);
@@ -546,7 +546,7 @@ class SimpleExceptionStub: public CodeStub {
class ArrayStoreExceptionStub: public SimpleExceptionStub {
public:
- ArrayStoreExceptionStub(LIR_Opr obj, CodeEmitInfo* info): SimpleExceptionStub(Runtime1::throw_array_store_exception_id, obj, info) {}
+ ArrayStoreExceptionStub(LIR_Opr obj, CodeEmitInfo* info): SimpleExceptionStub(C1StubId::throw_array_store_exception_id, obj, info) {}
#ifndef PRODUCT
virtual void print_name(outputStream* out) const { out->print("ArrayStoreExceptionStub"); }
#endif // PRODUCT
diff --git a/src/hotspot/share/c1/c1_Compilation.cpp b/src/hotspot/share/c1/c1_Compilation.cpp
index 48201a8605376..7e0d439aff4ef 100644
--- a/src/hotspot/share/c1/c1_Compilation.cpp
+++ b/src/hotspot/share/c1/c1_Compilation.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -274,8 +274,6 @@ void Compilation::emit_lir() {
// Assign physical registers to LIR operands using a linear scan algorithm.
allocator->do_linear_scan();
CHECK_BAILOUT();
-
- _max_spills = allocator->max_spills();
}
if (BailoutAfterLIR) {
@@ -568,7 +566,6 @@ Compilation::Compilation(AbstractCompiler* compiler, ciEnv* env, ciMethod* metho
, _method(method)
, _osr_bci(osr_bci)
, _hir(nullptr)
-, _max_spills(-1)
, _frame_map(nullptr)
, _masm(nullptr)
, _has_exception_handlers(false)
diff --git a/src/hotspot/share/c1/c1_Compilation.hpp b/src/hotspot/share/c1/c1_Compilation.hpp
index d55a8debca72c..0ac0a4d4169ab 100644
--- a/src/hotspot/share/c1/c1_Compilation.hpp
+++ b/src/hotspot/share/c1/c1_Compilation.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -73,7 +73,6 @@ class Compilation: public StackObj {
ciMethod* _method;
int _osr_bci;
IR* _hir;
- int _max_spills;
FrameMap* _frame_map;
C1_MacroAssembler* _masm;
bool _has_exception_handlers;
@@ -151,7 +150,6 @@ class Compilation: public StackObj {
int osr_bci() const { return _osr_bci; }
bool is_osr_compile() const { return osr_bci() >= 0; }
IR* hir() const { return _hir; }
- int max_spills() const { return _max_spills; }
FrameMap* frame_map() const { return _frame_map; }
CodeBuffer* code() { return &_code; }
C1_MacroAssembler* masm() const { return _masm; }
diff --git a/src/hotspot/share/c1/c1_Compiler.cpp b/src/hotspot/share/c1/c1_Compiler.cpp
index e1c4e90d0637d..a0944c864e68f 100644
--- a/src/hotspot/share/c1/c1_Compiler.cpp
+++ b/src/hotspot/share/c1/c1_Compiler.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -167,6 +167,9 @@ bool Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
case vmIntrinsics::_dsin:
case vmIntrinsics::_dcos:
case vmIntrinsics::_dtan:
+ #if defined(AMD64)
+ case vmIntrinsics::_dtanh:
+ #endif
case vmIntrinsics::_dlog:
case vmIntrinsics::_dlog10:
case vmIntrinsics::_dexp:
diff --git a/src/hotspot/share/c1/c1_GraphBuilder.cpp b/src/hotspot/share/c1/c1_GraphBuilder.cpp
index a2e903edc342f..02be6f8d49e4a 100644
--- a/src/hotspot/share/c1/c1_GraphBuilder.cpp
+++ b/src/hotspot/share/c1/c1_GraphBuilder.cpp
@@ -3339,6 +3339,7 @@ GraphBuilder::GraphBuilder(Compilation* compilation, IRScope* scope)
case vmIntrinsics::_dsin : // fall through
case vmIntrinsics::_dcos : // fall through
case vmIntrinsics::_dtan : // fall through
+ case vmIntrinsics::_dtanh : // fall through
case vmIntrinsics::_dlog : // fall through
case vmIntrinsics::_dlog10 : // fall through
case vmIntrinsics::_dexp : // fall through
diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
index 5d73ab5b88dba..c568caeca4b30 100644
--- a/src/hotspot/share/c1/c1_LIR.hpp
+++ b/src/hotspot/share/c1/c1_LIR.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
#include "c1/c1_ValueType.hpp"
#include "oops/method.hpp"
#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
class BlockBegin;
class BlockList;
@@ -1122,7 +1123,7 @@ class LIR_Op: public CompilationResourceObj {
}
#endif
- virtual const char * name() const PRODUCT_RETURN0;
+ virtual const char * name() const PRODUCT_RETURN_NULL;
virtual void visit(LIR_OpVisitState* state);
int id() const { return _id; }
@@ -1400,7 +1401,7 @@ class LIR_Op1: public LIR_Op {
virtual bool is_patching() { return _patch != lir_patch_none; }
virtual void emit_code(LIR_Assembler* masm);
virtual LIR_Op1* as_Op1() { return this; }
- virtual const char * name() const PRODUCT_RETURN0;
+ virtual const char * name() const PRODUCT_RETURN_NULL;
void set_in_opr(LIR_Opr opr) { _opr = opr; }
@@ -2033,8 +2034,9 @@ class LIR_OpProfileCall : public LIR_Op {
virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
bool should_profile_receiver_type() const {
bool callee_is_static = _profiled_callee->is_loaded() && _profiled_callee->is_static();
+ bool callee_is_private = _profiled_callee->is_loaded() && _profiled_callee->is_private();
Bytecodes::Code bc = _profiled_method->java_code_at_bci(_profiled_bci);
- bool call_is_virtual = (bc == Bytecodes::_invokevirtual && !_profiled_callee->can_be_statically_bound()) || bc == Bytecodes::_invokeinterface;
+ bool call_is_virtual = (bc == Bytecodes::_invokevirtual && !callee_is_private) || bc == Bytecodes::_invokeinterface;
return C1ProfileVirtualCalls && call_is_virtual && !callee_is_static;
}
};
diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp
index 7b519804bfecd..74fdf7a5b76a3 100644
--- a/src/hotspot/share/c1/c1_LIRGenerator.cpp
+++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp
@@ -659,7 +659,7 @@ void LIRGenerator::new_instance(LIR_Opr dst, ciInstanceKlass* klass, bool is_unr
if (UseFastNewInstance && klass->is_loaded()
&& !Klass::layout_helper_needs_slow_path(klass->layout_helper())) {
- Runtime1::StubID stub_id = klass->is_initialized() ? Runtime1::fast_new_instance_id : Runtime1::fast_new_instance_init_check_id;
+ C1StubId stub_id = klass->is_initialized() ? C1StubId::fast_new_instance_id : C1StubId::fast_new_instance_init_check_id;
CodeStub* slow_path = new NewInstanceStub(klass_reg, dst, klass, info, stub_id);
@@ -670,7 +670,7 @@ void LIRGenerator::new_instance(LIR_Opr dst, ciInstanceKlass* klass, bool is_unr
__ allocate_object(dst, scratch1, scratch2, scratch3, scratch4,
oopDesc::header_size(), instance_size, klass_reg, !klass->is_initialized(), slow_path);
} else {
- CodeStub* slow_path = new NewInstanceStub(klass_reg, dst, klass, info, Runtime1::new_instance_id);
+ CodeStub* slow_path = new NewInstanceStub(klass_reg, dst, klass, info, C1StubId::new_instance_id);
__ branch(lir_cond_always, slow_path);
__ branch_destination(slow_path->continuation());
}
@@ -1479,7 +1479,7 @@ void LIRGenerator::do_RegisterFinalizer(Intrinsic* x) {
args->append(receiver.result());
CodeEmitInfo* info = state_for(x, x->state());
call_runtime(&signature, args,
- CAST_FROM_FN_PTR(address, Runtime1::entry_for(Runtime1::register_finalizer_id)),
+ CAST_FROM_FN_PTR(address, Runtime1::entry_for(C1StubId::register_finalizer_id)),
voidType, info);
set_no_result(x);
@@ -2971,6 +2971,7 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
case vmIntrinsics::_dsqrt: // fall through
case vmIntrinsics::_dsqrt_strict: // fall through
case vmIntrinsics::_dtan: // fall through
+ case vmIntrinsics::_dtanh: // fall through
case vmIntrinsics::_dsin : // fall through
case vmIntrinsics::_dcos : // fall through
case vmIntrinsics::_dexp : // fall through
diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp
index 8524f37177b45..915f00f77c523 100644
--- a/src/hotspot/share/c1/c1_Runtime1.cpp
+++ b/src/hotspot/share/c1/c1_Runtime1.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -109,10 +109,13 @@ void StubAssembler::set_num_rt_args(int args) {
// Implementation of Runtime1
-CodeBlob* Runtime1::_blobs[Runtime1::number_of_ids];
+CodeBlob* Runtime1::_blobs[(int)C1StubId::NUM_STUBIDS];
+
+#define C1_BLOB_NAME_DEFINE(name) "C1 Runtime " # name "_blob",
const char *Runtime1::_blob_names[] = {
- RUNTIME1_STUBS(STUB_NAME, LAST_STUB_NAME)
+ C1_STUBS_DO(C1_BLOB_NAME_DEFINE)
};
+#undef C1_STUB_NAME_DEFINE
#ifndef PRODUCT
// statistics
@@ -190,17 +193,17 @@ static void deopt_caller(JavaThread* current) {
}
}
-class StubIDStubAssemblerCodeGenClosure: public StubAssemblerCodeGenClosure {
+class C1StubIdStubAssemblerCodeGenClosure: public StubAssemblerCodeGenClosure {
private:
- Runtime1::StubID _id;
+ C1StubId _id;
public:
- StubIDStubAssemblerCodeGenClosure(Runtime1::StubID id) : _id(id) {}
+ C1StubIdStubAssemblerCodeGenClosure(C1StubId id) : _id(id) {}
virtual OopMapSet* generate_code(StubAssembler* sasm) {
return Runtime1::generate_code_for(_id, sasm);
}
};
-CodeBlob* Runtime1::generate_blob(BufferBlob* buffer_blob, int stub_id, const char* name, bool expect_oop_map, StubAssemblerCodeGenClosure* cl) {
+CodeBlob* Runtime1::generate_blob(BufferBlob* buffer_blob, C1StubId id, const char* name, bool expect_oop_map, StubAssemblerCodeGenClosure* cl) {
ResourceMark rm;
// create code buffer for code storage
CodeBuffer code(buffer_blob);
@@ -212,7 +215,7 @@ CodeBlob* Runtime1::generate_blob(BufferBlob* buffer_blob, int stub_id, const ch
Compilation::setup_code_buffer(&code, 0);
// create assembler for code generation
- StubAssembler* sasm = new StubAssembler(&code, name, stub_id);
+ StubAssembler* sasm = new StubAssembler(&code, name, (int)id);
// generate code for runtime stub
oop_maps = cl->generate_code(sasm);
assert(oop_maps == nullptr || sasm->frame_size() != no_frame_size,
@@ -237,40 +240,41 @@ CodeBlob* Runtime1::generate_blob(BufferBlob* buffer_blob, int stub_id, const ch
return blob;
}
-void Runtime1::generate_blob_for(BufferBlob* buffer_blob, StubID id) {
- assert(0 <= id && id < number_of_ids, "illegal stub id");
+void Runtime1::generate_blob_for(BufferBlob* buffer_blob, C1StubId id) {
+ assert(C1StubId::NO_STUBID < id && id < C1StubId::NUM_STUBIDS, "illegal stub id");
bool expect_oop_map = true;
#ifdef ASSERT
// Make sure that stubs that need oopmaps have them
switch (id) {
// These stubs don't need to have an oopmap
- case dtrace_object_alloc_id:
- case slow_subtype_check_id:
- case fpu2long_stub_id:
- case unwind_exception_id:
- case counter_overflow_id:
+ case C1StubId::dtrace_object_alloc_id:
+ case C1StubId::slow_subtype_check_id:
+ case C1StubId::fpu2long_stub_id:
+ case C1StubId::unwind_exception_id:
+ case C1StubId::counter_overflow_id:
expect_oop_map = false;
break;
default:
break;
}
#endif
- StubIDStubAssemblerCodeGenClosure cl(id);
+ C1StubIdStubAssemblerCodeGenClosure cl(id);
CodeBlob* blob = generate_blob(buffer_blob, id, name_for(id), expect_oop_map, &cl);
// install blob
- _blobs[id] = blob;
+ _blobs[(int)id] = blob;
}
void Runtime1::initialize(BufferBlob* blob) {
// platform-dependent initialization
initialize_pd();
// generate stubs
- for (int id = 0; id < number_of_ids; id++) generate_blob_for(blob, (StubID)id);
+ int limit = (int)C1StubId::NUM_STUBIDS;
+ for (int id = 0; id < limit; id++) generate_blob_for(blob, (C1StubId)id);
// printing
#ifndef PRODUCT
if (PrintSimpleStubs) {
ResourceMark rm;
- for (int id = 0; id < number_of_ids; id++) {
+ for (int id = 0; id < limit; id++) {
_blobs[id]->print();
if (_blobs[id]->oop_maps() != nullptr) {
_blobs[id]->oop_maps()->print();
@@ -282,20 +286,22 @@ void Runtime1::initialize(BufferBlob* blob) {
bs->generate_c1_runtime_stubs(blob);
}
-CodeBlob* Runtime1::blob_for(StubID id) {
- assert(0 <= id && id < number_of_ids, "illegal stub id");
- return _blobs[id];
+CodeBlob* Runtime1::blob_for(C1StubId id) {
+ assert(C1StubId::NO_STUBID < id && id < C1StubId::NUM_STUBIDS, "illegal stub id");
+ return _blobs[(int)id];
}
-const char* Runtime1::name_for(StubID id) {
- assert(0 <= id && id < number_of_ids, "illegal stub id");
- return _blob_names[id];
+const char* Runtime1::name_for(C1StubId id) {
+ assert(C1StubId::NO_STUBID < id && id < C1StubId::NUM_STUBIDS, "illegal stub id");
+ return _blob_names[(int)id];
}
const char* Runtime1::name_for_address(address entry) {
- for (int id = 0; id < number_of_ids; id++) {
- if (entry == entry_for((StubID)id)) return name_for((StubID)id);
+ int limit = (int)C1StubId::NUM_STUBIDS;
+ for (int i = 0; i < limit; i++) {
+ C1StubId id = (C1StubId)i;
+ if (entry == entry_for(id)) return name_for(id);
}
#define FUNCTION_CASE(a, f) \
@@ -341,6 +347,7 @@ const char* Runtime1::name_for_address(address entry) {
FUNCTION_CASE(entry, StubRoutines::dsin());
FUNCTION_CASE(entry, StubRoutines::dcos());
FUNCTION_CASE(entry, StubRoutines::dtan());
+ FUNCTION_CASE(entry, StubRoutines::dtanh());
#undef FUNCTION_CASE
@@ -425,8 +432,8 @@ JRT_ENTRY(void, Runtime1::new_multi_array(JavaThread* current, Klass* klass, int
JRT_END
-JRT_ENTRY(void, Runtime1::unimplemented_entry(JavaThread* current, StubID id))
- tty->print_cr("Runtime1::entry_for(%d) returned unimplemented entry point", id);
+JRT_ENTRY(void, Runtime1::unimplemented_entry(JavaThread* current, C1StubId id))
+ tty->print_cr("Runtime1::entry_for(%d) returned unimplemented entry point", (int)id);
JRT_END
@@ -525,8 +532,8 @@ JRT_ENTRY_NO_ASYNC(static address, exception_handler_for_pc_helper(JavaThread* c
// This function is called when we are about to throw an exception. Therefore,
// we have to poll the stack watermark barrier to make sure that not yet safe
// stack frames are made safe before returning into them.
- if (current->last_frame().cb() == Runtime1::blob_for(Runtime1::handle_exception_from_callee_id)) {
- // The Runtime1::handle_exception_from_callee_id handler is invoked after the
+ if (current->last_frame().cb() == Runtime1::blob_for(C1StubId::handle_exception_from_callee_id)) {
+ // The C1StubId::handle_exception_from_callee_id handler is invoked after the
// frame has been unwound. It instead builds its own stub frame, to call the
// runtime. But the throwing frame has already been unwound here.
StackWatermarkSet::after_unwind(current);
@@ -922,7 +929,7 @@ static Klass* resolve_field_return_klass(const methodHandle& caller, int bci, TR
// Therefore, if there is any chance of a race condition, we try to
// patch only naturally aligned words, as single, full-word writes.
-JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, Runtime1::StubID stub_id ))
+JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, C1StubId stub_id ))
#ifndef PRODUCT
if (PrintC1Statistics) {
_patch_code_slowcase_cnt++;
@@ -959,9 +966,9 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, Runtime1::StubID stub_
Handle mirror(current, nullptr); // oop needed by load_mirror_patching code
Handle appendix(current, nullptr); // oop needed by appendix_patching code
bool load_klass_or_mirror_patch_id =
- (stub_id == Runtime1::load_klass_patching_id || stub_id == Runtime1::load_mirror_patching_id);
+ (stub_id == C1StubId::load_klass_patching_id || stub_id == C1StubId::load_mirror_patching_id);
- if (stub_id == Runtime1::access_field_patching_id) {
+ if (stub_id == C1StubId::access_field_patching_id) {
Bytecode_field field_access(caller_method, bci);
fieldDescriptor result; // initialize class if needed
@@ -1044,7 +1051,7 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, Runtime1::StubID stub_
default: fatal("unexpected bytecode for load_klass_or_mirror_patch_id");
}
load_klass = k;
- } else if (stub_id == load_appendix_patching_id) {
+ } else if (stub_id == C1StubId::load_appendix_patching_id) {
Bytecode_invoke bytecode(caller_method, bci);
Bytecodes::Code bc = bytecode.invoke_code();
@@ -1128,7 +1135,7 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, Runtime1::StubID stub_
if (TracePatching) {
ttyLocker ttyl;
tty->print_cr(" Patching %s at bci %d at address " INTPTR_FORMAT " (%s)", Bytecodes::name(code), bci,
- p2i(instr_pc), (stub_id == Runtime1::access_field_patching_id) ? "field" : "klass");
+ p2i(instr_pc), (stub_id == C1StubId::access_field_patching_id) ? "field" : "klass");
nmethod* caller_code = CodeCache::find_nmethod(caller_frame.pc());
assert(caller_code != nullptr, "nmethod not found");
@@ -1144,7 +1151,7 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, Runtime1::StubID stub_
}
// depending on the code below, do_patch says whether to copy the patch body back into the nmethod
bool do_patch = true;
- if (stub_id == Runtime1::access_field_patching_id) {
+ if (stub_id == C1StubId::access_field_patching_id) {
// The offset may not be correct if the class was not loaded at code generation time.
// Set it now.
NativeMovRegMem* n_move = nativeMovRegMem_at(copy_buff);
@@ -1170,7 +1177,7 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, Runtime1::StubID stub_
assert(n_copy->data() == 0 ||
n_copy->data() == (intptr_t)Universe::non_oop_word(),
"illegal init value");
- if (stub_id == Runtime1::load_klass_patching_id) {
+ if (stub_id == C1StubId::load_klass_patching_id) {
assert(load_klass != nullptr, "klass not set");
n_copy->set_data((intx) (load_klass));
} else {
@@ -1182,7 +1189,7 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, Runtime1::StubID stub_
Disassembler::decode(copy_buff, copy_buff + *byte_count, tty);
}
}
- } else if (stub_id == Runtime1::load_appendix_patching_id) {
+ } else if (stub_id == C1StubId::load_appendix_patching_id) {
NativeMovConstReg* n_copy = nativeMovConstReg_at(copy_buff);
assert(n_copy->data() == 0 ||
n_copy->data() == (intptr_t)Universe::non_oop_word(),
@@ -1201,7 +1208,7 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, Runtime1::StubID stub_
// first replace the tail, then the call
#ifdef ARM
if((load_klass_or_mirror_patch_id ||
- stub_id == Runtime1::load_appendix_patching_id) &&
+ stub_id == C1StubId::load_appendix_patching_id) &&
nativeMovConstReg_at(copy_buff)->is_pc_relative()) {
nmethod* nm = CodeCache::find_nmethod(instr_pc);
address addr = nullptr;
@@ -1209,13 +1216,13 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, Runtime1::StubID stub_
RelocIterator mds(nm, copy_buff, copy_buff + 1);
while (mds.next()) {
if (mds.type() == relocInfo::oop_type) {
- assert(stub_id == Runtime1::load_mirror_patching_id ||
- stub_id == Runtime1::load_appendix_patching_id, "wrong stub id");
+ assert(stub_id == C1StubId::load_mirror_patching_id ||
+ stub_id == C1StubId::load_appendix_patching_id, "wrong stub id");
oop_Relocation* r = mds.oop_reloc();
addr = (address)r->oop_addr();
break;
} else if (mds.type() == relocInfo::metadata_type) {
- assert(stub_id == Runtime1::load_klass_patching_id, "wrong stub id");
+ assert(stub_id == C1StubId::load_klass_patching_id, "wrong stub id");
metadata_Relocation* r = mds.metadata_reloc();
addr = (address)r->metadata_addr();
break;
@@ -1238,9 +1245,9 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, Runtime1::StubID stub_
NativeGeneralJump::replace_mt_safe(instr_pc, copy_buff);
if (load_klass_or_mirror_patch_id ||
- stub_id == Runtime1::load_appendix_patching_id) {
+ stub_id == C1StubId::load_appendix_patching_id) {
relocInfo::relocType rtype =
- (stub_id == Runtime1::load_klass_patching_id) ?
+ (stub_id == C1StubId::load_klass_patching_id) ?
relocInfo::metadata_type :
relocInfo::oop_type;
// update relocInfo to metadata
@@ -1278,9 +1285,9 @@ JRT_END
#else // DEOPTIMIZE_WHEN_PATCHING
-static bool is_patching_needed(JavaThread* current, Runtime1::StubID stub_id) {
- if (stub_id == Runtime1::load_klass_patching_id ||
- stub_id == Runtime1::load_mirror_patching_id) {
+static bool is_patching_needed(JavaThread* current, C1StubId stub_id) {
+ if (stub_id == C1StubId::load_klass_patching_id ||
+ stub_id == C1StubId::load_mirror_patching_id) {
// last java frame on stack
vframeStream vfst(current, true);
assert(!vfst.at_end(), "Java frame must exist");
@@ -1309,7 +1316,7 @@ static bool is_patching_needed(JavaThread* current, Runtime1::StubID stub_id) {
return true;
}
-void Runtime1::patch_code(JavaThread* current, Runtime1::StubID stub_id) {
+void Runtime1::patch_code(JavaThread* current, C1StubId stub_id) {
#ifndef PRODUCT
if (PrintC1Statistics) {
_patch_code_slowcase_cnt++;
@@ -1364,7 +1371,7 @@ int Runtime1::move_klass_patching(JavaThread* current) {
{
// Enter VM mode
ResetNoHandleMark rnhm;
- patch_code(current, load_klass_patching_id);
+ patch_code(current, C1StubId::load_klass_patching_id);
}
// Back in JAVA, use no oops DON'T safepoint
@@ -1381,7 +1388,7 @@ int Runtime1::move_mirror_patching(JavaThread* current) {
{
// Enter VM mode
ResetNoHandleMark rnhm;
- patch_code(current, load_mirror_patching_id);
+ patch_code(current, C1StubId::load_mirror_patching_id);
}
// Back in JAVA, use no oops DON'T safepoint
@@ -1398,7 +1405,7 @@ int Runtime1::move_appendix_patching(JavaThread* current) {
{
// Enter VM mode
ResetNoHandleMark rnhm;
- patch_code(current, load_appendix_patching_id);
+ patch_code(current, C1StubId::load_appendix_patching_id);
}
// Back in JAVA, use no oops DON'T safepoint
@@ -1425,7 +1432,7 @@ int Runtime1::access_field_patching(JavaThread* current) {
{
// Enter VM mode
ResetNoHandleMark rnhm;
- patch_code(current, access_field_patching_id);
+ patch_code(current, C1StubId::access_field_patching_id);
}
// Back in JAVA, use no oops DON'T safepoint
diff --git a/src/hotspot/share/c1/c1_Runtime1.hpp b/src/hotspot/share/c1/c1_Runtime1.hpp
index 2e4c9f8a73379..330c40675041b 100644
--- a/src/hotspot/share/c1/c1_Runtime1.hpp
+++ b/src/hotspot/share/c1/c1_Runtime1.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -30,6 +30,7 @@
#include "interpreter/interpreter.hpp"
#include "memory/allStatic.hpp"
#include "runtime/deoptimization.hpp"
+#include "runtime/stubDeclarations.hpp"
class StubAssembler;
@@ -37,61 +38,26 @@ class StubAssembler;
// runtime routines needed by code code generated
// by the Compiler1.
-#define RUNTIME1_STUBS(stub, last_entry) \
- stub(dtrace_object_alloc) \
- stub(unwind_exception) \
- stub(forward_exception) \
- stub(throw_range_check_failed) /* throws ArrayIndexOutOfBoundsException */ \
- stub(throw_index_exception) /* throws IndexOutOfBoundsException */ \
- stub(throw_div0_exception) \
- stub(throw_null_pointer_exception) \
- stub(register_finalizer) \
- stub(new_instance) \
- stub(fast_new_instance) \
- stub(fast_new_instance_init_check) \
- stub(new_type_array) \
- stub(new_object_array) \
- stub(new_multi_array) \
- stub(handle_exception_nofpu) /* optimized version that does not preserve fpu registers */ \
- stub(handle_exception) \
- stub(handle_exception_from_callee) \
- stub(throw_array_store_exception) \
- stub(throw_class_cast_exception) \
- stub(throw_incompatible_class_change_error) \
- stub(slow_subtype_check) \
- stub(monitorenter) \
- stub(monitorenter_nofpu) /* optimized version that does not preserve fpu registers */ \
- stub(monitorexit) \
- stub(monitorexit_nofpu) /* optimized version that does not preserve fpu registers */ \
- stub(deoptimize) \
- stub(access_field_patching) \
- stub(load_klass_patching) \
- stub(load_mirror_patching) \
- stub(load_appendix_patching) \
- stub(fpu2long_stub) \
- stub(counter_overflow) \
- stub(predicate_failed_trap) \
- last_entry(number_of_ids)
-
-#define DECLARE_STUB_ID(x) x ## _id ,
-#define DECLARE_LAST_STUB_ID(x) x
-#define STUB_NAME(x) #x " Runtime1 stub",
-#define LAST_STUB_NAME(x) #x " Runtime1 stub"
-
class StubAssemblerCodeGenClosure: public Closure {
public:
virtual OopMapSet* generate_code(StubAssembler* sasm) = 0;
};
+// define C1StubId enum tags: unwind_exception_id etc
+
+#define C1_STUB_ID_ENUM_DECLARE(name) STUB_ID_NAME(name),
+enum class C1StubId :int {
+ NO_STUBID = -1,
+ C1_STUBS_DO(C1_STUB_ID_ENUM_DECLARE)
+ NUM_STUBIDS
+};
+#undef C1_STUB_ID_ENUM_DECLARE
+
class Runtime1: public AllStatic {
friend class VMStructs;
friend class ArrayCopyStub;
- public:
- enum StubID {
- RUNTIME1_STUBS(DECLARE_STUB_ID, DECLARE_LAST_STUB_ID)
- };
-
+public:
// statistics
#ifndef PRODUCT
static uint _generic_arraycopystub_cnt;
@@ -115,17 +81,17 @@ class Runtime1: public AllStatic {
#endif
private:
- static CodeBlob* _blobs[number_of_ids];
+ static CodeBlob* _blobs[(int)C1StubId::NUM_STUBIDS];
static const char* _blob_names[];
// stub generation
public:
- static CodeBlob* generate_blob(BufferBlob* buffer_blob, int stub_id, const char* name, bool expect_oop_map, StubAssemblerCodeGenClosure *cl);
- static void generate_blob_for(BufferBlob* blob, StubID id);
- static OopMapSet* generate_code_for(StubID id, StubAssembler* sasm);
+ static CodeBlob* generate_blob(BufferBlob* buffer_blob, C1StubId id, const char* name, bool expect_oop_map, StubAssemblerCodeGenClosure *cl);
+ static void generate_blob_for(BufferBlob* blob, C1StubId id);
+ static OopMapSet* generate_code_for(C1StubId id, StubAssembler* sasm);
private:
static OopMapSet* generate_exception_throw(StubAssembler* sasm, address target, bool has_argument);
- static OopMapSet* generate_handle_exception(StubID id, StubAssembler* sasm);
+ static OopMapSet* generate_handle_exception(C1StubId id, StubAssembler* sasm);
static void generate_unwind_exception(StubAssembler *sasm);
static OopMapSet* generate_patching(StubAssembler* sasm, address target);
@@ -140,7 +106,7 @@ class Runtime1: public AllStatic {
static address counter_overflow(JavaThread* current, int bci, Method* method);
- static void unimplemented_entry(JavaThread* current, StubID id);
+ static void unimplemented_entry(JavaThread* current, C1StubId id);
static address exception_handler_for_pc(JavaThread* current);
@@ -162,7 +128,7 @@ class Runtime1: public AllStatic {
static int move_mirror_patching(JavaThread* current);
static int move_appendix_patching(JavaThread* current);
- static void patch_code(JavaThread* current, StubID stub_id);
+ static void patch_code(JavaThread* current, C1StubId stub_id);
public:
// initialization
@@ -170,9 +136,9 @@ class Runtime1: public AllStatic {
static void initialize_pd();
// stubs
- static CodeBlob* blob_for (StubID id);
- static address entry_for(StubID id) { return blob_for(id)->code_begin(); }
- static const char* name_for (StubID id);
+ static CodeBlob* blob_for (C1StubId id);
+ static address entry_for(C1StubId id) { return blob_for(id)->code_begin(); }
+ static const char* name_for (C1StubId id);
static const char* name_for_address(address entry);
// platform might add runtime names.
diff --git a/src/hotspot/share/cds/archiveBuilder.cpp b/src/hotspot/share/cds/archiveBuilder.cpp
index 76b6698a40099..3c67216d4c5bf 100644
--- a/src/hotspot/share/cds/archiveBuilder.cpp
+++ b/src/hotspot/share/cds/archiveBuilder.cpp
@@ -1104,6 +1104,17 @@ class ArchiveBuilder::CDSMapLogger : AllStatic {
LogStreamHandle(Info, cds, map) st;
+ HeapRootSegments segments = heap_info->heap_root_segments();
+ assert(segments.base_offset() == 0, "Sanity");
+
+ for (size_t seg_idx = 0; seg_idx < segments.count(); seg_idx++) {
+ address requested_start = ArchiveHeapWriter::buffered_addr_to_requested_addr(start);
+ st.print_cr(PTR_FORMAT ": Heap roots segment [%d]",
+ p2i(requested_start), segments.size_in_elems(seg_idx));
+ start += segments.size_in_bytes(seg_idx);
+ }
+ log_heap_roots();
+
while (start < end) {
size_t byte_size;
oop source_oop = ArchiveHeapWriter::buffered_addr_to_source_obj(start);
@@ -1114,12 +1125,6 @@ class ArchiveBuilder::CDSMapLogger : AllStatic {
// This is a regular oop that got archived.
print_oop_with_requested_addr_cr(&st, source_oop, false);
byte_size = source_oop->size() * BytesPerWord;
- } else if (start == ArchiveHeapWriter::buffered_heap_roots_addr()) {
- // HeapShared::roots() is copied specially, so it doesn't exist in
- // ArchiveHeapWriter::BufferOffsetToSourceObjectTable.
- // See ArchiveHeapWriter::copy_roots_to_buffer().
- st.print_cr("HeapShared::roots[%d]", HeapShared::pending_roots()->length());
- byte_size = ArchiveHeapWriter::heap_roots_word_size() * BytesPerWord;
} else if ((byte_size = ArchiveHeapWriter::get_filler_size_at(start)) > 0) {
// We have a filler oop, which also does not exist in BufferOffsetToSourceObjectTable.
st.print_cr("filler " SIZE_FORMAT " bytes", byte_size);
@@ -1132,8 +1137,6 @@ class ArchiveBuilder::CDSMapLogger : AllStatic {
if (source_oop != nullptr) {
log_oop_details(heap_info, source_oop, /*buffered_addr=*/start);
- } else if (start == ArchiveHeapWriter::buffered_heap_roots_addr()) {
- log_heap_roots();
}
start = oop_end;
}
diff --git a/src/hotspot/share/cds/archiveHeapLoader.cpp b/src/hotspot/share/cds/archiveHeapLoader.cpp
index feaf245d22c6b..0e7ef08064c37 100644
--- a/src/hotspot/share/cds/archiveHeapLoader.cpp
+++ b/src/hotspot/share/cds/archiveHeapLoader.cpp
@@ -374,8 +374,17 @@ void ArchiveHeapLoader::finish_initialization() {
if (is_in_use()) {
patch_native_pointers();
intptr_t bottom = is_loaded() ? _loaded_heap_bottom : _mapped_heap_bottom;
- intptr_t roots_oop = bottom + FileMapInfo::current_info()->heap_roots_offset();
- HeapShared::init_roots(cast_to_oop(roots_oop));
+
+ // The heap roots are stored in one or more segments that are laid out consecutively.
+ // The size of each segment (except for the last one) is max_size_in_{elems,bytes}.
+ HeapRootSegments segments = FileMapInfo::current_info()->heap_root_segments();
+ HeapShared::init_root_segment_sizes(segments.max_size_in_elems());
+ intptr_t first_segment_addr = bottom + segments.base_offset();
+ for (size_t c = 0; c < segments.count(); c++) {
+ oop segment_oop = cast_to_oop(first_segment_addr + (c * segments.max_size_in_bytes()));
+ assert(segment_oop->is_objArray(), "Must be");
+ HeapShared::add_root_segment((objArrayOop)segment_oop);
+ }
}
}
diff --git a/src/hotspot/share/cds/archiveHeapWriter.cpp b/src/hotspot/share/cds/archiveHeapWriter.cpp
index bf49805658c09..710e693bfdb14 100644
--- a/src/hotspot/share/cds/archiveHeapWriter.cpp
+++ b/src/hotspot/share/cds/archiveHeapWriter.cpp
@@ -52,9 +52,9 @@ GrowableArrayCHeap* ArchiveHeapWriter::_buffer = nullptr;
// The following are offsets from buffer_bottom()
size_t ArchiveHeapWriter::_buffer_used;
-size_t ArchiveHeapWriter::_heap_roots_offset;
-size_t ArchiveHeapWriter::_heap_roots_word_size;
+// Heap root segments
+HeapRootSegments ArchiveHeapWriter::_heap_root_segments;
address ArchiveHeapWriter::_requested_bottom;
address ArchiveHeapWriter::_requested_top;
@@ -88,7 +88,6 @@ void ArchiveHeapWriter::init() {
_native_pointers = new GrowableArrayCHeap(2048);
_source_objs = new GrowableArrayCHeap(10000);
- guarantee(UseG1GC, "implementation limitation");
guarantee(MIN_GC_REGION_ALIGNMENT <= G1HeapRegion::min_region_size_in_words() * HeapWordSize, "must be");
}
}
@@ -164,10 +163,6 @@ address ArchiveHeapWriter::buffered_addr_to_requested_addr(address buffered_addr
return _requested_bottom + buffered_address_to_offset(buffered_addr);
}
-oop ArchiveHeapWriter::heap_roots_requested_address() {
- return cast_to_oop(_requested_bottom + _heap_roots_offset);
-}
-
address ArchiveHeapWriter::requested_address() {
assert(_buffer != nullptr, "must be initialized");
return _requested_bottom;
@@ -186,54 +181,85 @@ void ArchiveHeapWriter::ensure_buffer_space(size_t min_bytes) {
_buffer->at_grow(to_array_index(min_bytes));
}
-void ArchiveHeapWriter::copy_roots_to_buffer(GrowableArrayCHeap* roots) {
- Klass* k = Universe::objectArrayKlass(); // already relocated to point to archived klass
- int length = roots->length();
- _heap_roots_word_size = objArrayOopDesc::object_size(length);
- size_t byte_size = _heap_roots_word_size * HeapWordSize;
- if (byte_size >= MIN_GC_REGION_ALIGNMENT) {
- log_error(cds, heap)("roots array is too large. Please reduce the number of classes");
- vm_exit(1);
- }
-
- maybe_fill_gc_region_gap(byte_size);
+objArrayOop ArchiveHeapWriter::allocate_root_segment(size_t offset, int element_count) {
+ HeapWord* mem = offset_to_buffered_address(offset);
+ memset(mem, 0, objArrayOopDesc::object_size(element_count));
- size_t new_used = _buffer_used + byte_size;
- ensure_buffer_space(new_used);
+ // The initialization code is copied from MemAllocator::finish and ObjArrayAllocator::initialize.
+ oopDesc::set_mark(mem, markWord::prototype());
+ oopDesc::release_set_klass(mem, Universe::objectArrayKlass());
+ arrayOopDesc::set_length(mem, element_count);
+ return objArrayOop(cast_to_oop(mem));
+}
- HeapWord* mem = offset_to_buffered_address(_buffer_used);
- memset(mem, 0, byte_size);
- {
- // This is copied from MemAllocator::finish
- oopDesc::set_mark(mem, markWord::prototype());
- oopDesc::release_set_klass(mem, k);
- }
- {
- // This is copied from ObjArrayAllocator::initialize
- arrayOopDesc::set_length(mem, length);
+void ArchiveHeapWriter::root_segment_at_put(objArrayOop segment, int index, oop root) {
+ // Do not use arrayOop->obj_at_put(i, o) as arrayOop is outside the real heap!
+ if (UseCompressedOops) {
+ *segment->obj_at_addr(index) = CompressedOops::encode(root);
+ } else {
+ *segment->obj_at_addr(index) = root;
}
+}
- objArrayOop arrayOop = objArrayOop(cast_to_oop(mem));
- for (int i = 0; i < length; i++) {
- // Do not use arrayOop->obj_at_put(i, o) as arrayOop is outside of the real heap!
- oop o = roots->at(i);
- if (UseCompressedOops) {
- * arrayOop->obj_at_addr(i) = CompressedOops::encode(o);
- } else {
- * arrayOop->obj_at_addr(i) = o;
+void ArchiveHeapWriter::copy_roots_to_buffer(GrowableArrayCHeap* roots) {
+ // Depending on the number of classes we are archiving, a single roots array may be
+ // larger than MIN_GC_REGION_ALIGNMENT. Roots are allocated first in the buffer, which
+ // allows us to chop the large array into a series of "segments". Current layout
+ // starts with zero or more segments exactly fitting MIN_GC_REGION_ALIGNMENT, and end
+ // with a single segment that may be smaller than MIN_GC_REGION_ALIGNMENT.
+ // This is simple and efficient. We do not need filler objects anywhere between the segments,
+ // or immediately after the last segment. This allows starting the object dump immediately
+ // after the roots.
+
+ assert((_buffer_used % MIN_GC_REGION_ALIGNMENT) == 0,
+ "Pre-condition: Roots start at aligned boundary: " SIZE_FORMAT, _buffer_used);
+
+ int max_elem_count = ((MIN_GC_REGION_ALIGNMENT - arrayOopDesc::header_size_in_bytes()) / heapOopSize);
+ assert(objArrayOopDesc::object_size(max_elem_count)*HeapWordSize == MIN_GC_REGION_ALIGNMENT,
+ "Should match exactly");
+
+ HeapRootSegments segments(_buffer_used,
+ roots->length(),
+ MIN_GC_REGION_ALIGNMENT,
+ max_elem_count);
+
+ int root_index = 0;
+ for (size_t seg_idx = 0; seg_idx < segments.count(); seg_idx++) {
+ int size_elems = segments.size_in_elems(seg_idx);
+ size_t size_bytes = segments.size_in_bytes(seg_idx);
+
+ size_t oop_offset = _buffer_used;
+ _buffer_used = oop_offset + size_bytes;
+ ensure_buffer_space(_buffer_used);
+
+ assert((oop_offset % MIN_GC_REGION_ALIGNMENT) == 0,
+ "Roots segment " SIZE_FORMAT " start is not aligned: " SIZE_FORMAT,
+ segments.count(), oop_offset);
+
+ objArrayOop seg_oop = allocate_root_segment(oop_offset, size_elems);
+ for (int i = 0; i < size_elems; i++) {
+ root_segment_at_put(seg_oop, i, roots->at(root_index++));
}
+
+ log_info(cds, heap)("archived obj root segment [%d] = " SIZE_FORMAT " bytes, obj = " PTR_FORMAT,
+ size_elems, size_bytes, p2i(seg_oop));
}
- log_info(cds, heap)("archived obj roots[%d] = " SIZE_FORMAT " bytes, klass = %p, obj = %p", length, byte_size, k, mem);
- _heap_roots_offset = _buffer_used;
- _buffer_used = new_used;
+ assert(root_index == roots->length(), "Post-condition: All roots are handled");
+
+ _heap_root_segments = segments;
}
+// The goal is to sort the objects in increasing order of:
+// - objects that have only oop pointers
+// - objects that have both native and oop pointers
+// - objects that have only native pointers
+// - objects that have no pointers
static int oop_sorting_rank(oop o) {
bool has_oop_ptr, has_native_ptr;
HeapShared::get_pointer_info(o, has_oop_ptr, has_native_ptr);
- if (!has_oop_ptr) {
+ if (has_oop_ptr) {
if (!has_native_ptr) {
return 0;
} else {
@@ -248,11 +274,6 @@ static int oop_sorting_rank(oop o) {
}
}
-// The goal is to sort the objects in increasing order of:
-// - objects that have no pointers
-// - objects that have only native pointers
-// - objects that have both native and oop pointers
-// - objects that have only oop pointers
int ArchiveHeapWriter::compare_objs_by_oop_fields(HeapObjOrder* a, HeapObjOrder* b) {
int rank_a = a->_rank;
int rank_b = b->_rank;
@@ -282,6 +303,10 @@ void ArchiveHeapWriter::sort_source_objs() {
}
void ArchiveHeapWriter::copy_source_objs_to_buffer(GrowableArrayCHeap* roots) {
+ // There could be multiple root segments, which we want to be aligned by region.
+ // Putting them ahead of objects makes sure we waste no space.
+ copy_roots_to_buffer(roots);
+
sort_source_objs();
for (int i = 0; i < _source_objs_order->length(); i++) {
int src_obj_index = _source_objs_order->at(i)._index;
@@ -295,8 +320,6 @@ void ArchiveHeapWriter::copy_source_objs_to_buffer(GrowableArrayCHeapmaybe_grow();
}
- copy_roots_to_buffer(roots);
-
log_info(cds)("Size of heap region = " SIZE_FORMAT " bytes, %d objects, %d roots, %d native ptrs",
_buffer_used, _source_objs->length() + 1, roots->length(), _num_native_ptrs);
}
@@ -430,32 +453,36 @@ size_t ArchiveHeapWriter::copy_one_source_obj_to_buffer(oop src_obj) {
void ArchiveHeapWriter::set_requested_address(ArchiveHeapInfo* info) {
assert(!info->is_used(), "only set once");
- assert(UseG1GC, "must be");
- address heap_end = (address)G1CollectedHeap::heap()->reserved().end();
- log_info(cds, heap)("Heap end = %p", heap_end);
size_t heap_region_byte_size = _buffer_used;
assert(heap_region_byte_size > 0, "must archived at least one object!");
-
if (UseCompressedOops) {
- _requested_bottom = align_down(heap_end - heap_region_byte_size, G1HeapRegion::GrainBytes);
+ if (UseG1GC) {
+ address heap_end = (address)G1CollectedHeap::heap()->reserved().end();
+ log_info(cds, heap)("Heap end = %p", heap_end);
+ _requested_bottom = align_down(heap_end - heap_region_byte_size, G1HeapRegion::GrainBytes);
+ _requested_bottom = align_down(_requested_bottom, MIN_GC_REGION_ALIGNMENT);
+ assert(is_aligned(_requested_bottom, G1HeapRegion::GrainBytes), "sanity");
+ } else {
+ _requested_bottom = align_up(CompressedOops::begin(), MIN_GC_REGION_ALIGNMENT);
+ }
} else {
// We always write the objects as if the heap started at this address. This
// makes the contents of the archive heap deterministic.
//
// Note that at runtime, the heap address is selected by the OS, so the archive
// heap will not be mapped at 0x10000000, and the contents need to be patched.
- _requested_bottom = (address)NOCOOPS_REQUESTED_BASE;
+ _requested_bottom = align_up((address)NOCOOPS_REQUESTED_BASE, MIN_GC_REGION_ALIGNMENT);
}
- assert(is_aligned(_requested_bottom, G1HeapRegion::GrainBytes), "sanity");
+ assert(is_aligned(_requested_bottom, MIN_GC_REGION_ALIGNMENT), "sanity");
_requested_top = _requested_bottom + _buffer_used;
info->set_buffer_region(MemRegion(offset_to_buffered_address(0),
offset_to_buffered_address(_buffer_used)));
- info->set_heap_roots_offset(_heap_roots_offset);
+ info->set_heap_root_segments(_heap_root_segments);
}
// Oop relocation
@@ -543,12 +570,6 @@ void ArchiveHeapWriter::update_header_for_requested_obj(oop requested_obj, oop s
}
}
-// Relocate an element in the buffered copy of HeapShared::roots()
-template void ArchiveHeapWriter::relocate_root_at(oop requested_roots, int index, CHeapBitMap* oopmap) {
- size_t offset = (size_t)((objArrayOop)requested_roots)->obj_at_offset(index);
- relocate_field_in_buffer((T*)(buffered_heap_roots_addr() + offset), oopmap);
-}
-
class ArchiveHeapWriter::EmbeddedOopRelocator: public BasicOopIterateClosure {
oop _src_obj;
address _buffered_obj;
@@ -600,14 +621,24 @@ void ArchiveHeapWriter::relocate_embedded_oops(GrowableArrayCHeaplength() : 0;
- for (int i = 0; i < length; i++) {
+ for (size_t seg_idx = 0; seg_idx < _heap_root_segments.count(); seg_idx++) {
+ size_t seg_offset = _heap_root_segments.segment_offset(seg_idx);
+
+ objArrayOop requested_obj = (objArrayOop)requested_obj_from_buffer_offset(seg_offset);
+ update_header_for_requested_obj(requested_obj, nullptr, Universe::objectArrayKlass());
+ address buffered_obj = offset_to_buffered_address(seg_offset);
+ int length = _heap_root_segments.size_in_elems(seg_idx);
+
if (UseCompressedOops) {
- relocate_root_at(requested_roots, i, heap_info->oopmap());
+ for (int i = 0; i < length; i++) {
+ narrowOop* addr = (narrowOop*)(buffered_obj + objArrayOopDesc::obj_at_offset(i));
+ relocate_field_in_buffer(addr, heap_info->oopmap());
+ }
} else {
- relocate_root_at(requested_roots, i, heap_info->oopmap());
+ for (int i = 0; i < length; i++) {
+ oop* addr = (oop*)(buffered_obj + objArrayOopDesc::obj_at_offset(i));
+ relocate_field_in_buffer(addr, heap_info->oopmap());
+ }
}
}
diff --git a/src/hotspot/share/cds/archiveHeapWriter.hpp b/src/hotspot/share/cds/archiveHeapWriter.hpp
index 352aeb9a08f7c..29ea50ba5fe86 100644
--- a/src/hotspot/share/cds/archiveHeapWriter.hpp
+++ b/src/hotspot/share/cds/archiveHeapWriter.hpp
@@ -41,8 +41,7 @@ class ArchiveHeapInfo {
MemRegion _buffer_region; // Contains the archived objects to be written into the CDS archive.
CHeapBitMap _oopmap;
CHeapBitMap _ptrmap;
- size_t _heap_roots_offset; // Offset of the HeapShared::roots() object, from the bottom
- // of the archived heap objects, in bytes.
+ HeapRootSegments _heap_root_segments;
public:
ArchiveHeapInfo() : _buffer_region(), _oopmap(128, mtClassShared), _ptrmap(128, mtClassShared) {}
@@ -57,8 +56,8 @@ class ArchiveHeapInfo {
CHeapBitMap* oopmap() { return &_oopmap; }
CHeapBitMap* ptrmap() { return &_ptrmap; }
- void set_heap_roots_offset(size_t n) { _heap_roots_offset = n; }
- size_t heap_roots_offset() const { return _heap_roots_offset; }
+ void set_heap_root_segments(HeapRootSegments segments) { _heap_root_segments = segments; };
+ HeapRootSegments heap_root_segments() { return _heap_root_segments; }
};
#if INCLUDE_CDS_JAVA_HEAP
@@ -112,11 +111,10 @@ class ArchiveHeapWriter : AllStatic {
public:
static const intptr_t NOCOOPS_REQUESTED_BASE = 0x10000000;
- // The minimum region size of all collectors that are supported by CDS in
- // ArchiveHeapLoader::can_map() mode. Currently only G1 is supported. G1's region size
- // depends on -Xmx, but can never be smaller than 1 * M.
- // (TODO: Perhaps change to 256K to be compatible with Shenandoah)
- static constexpr int MIN_GC_REGION_ALIGNMENT = 1 * M;
+ // The minimum region size of all collectors that are supported by CDS.
+ // G1 heap region size can never be smaller than 1M.
+ // Shenandoah heap region size can never be smaller than 256K.
+ static constexpr int MIN_GC_REGION_ALIGNMENT = 256 * K;
private:
class EmbeddedOopRelocator;
@@ -130,9 +128,8 @@ class ArchiveHeapWriter : AllStatic {
// The number of bytes that have written into _buffer (may be smaller than _buffer->length()).
static size_t _buffer_used;
- // The bottom of the copy of Heap::roots() inside this->_buffer.
- static size_t _heap_roots_offset;
- static size_t _heap_roots_word_size;
+ // The heap root segments information.
+ static HeapRootSegments _heap_root_segments;
// The address range of the requested location of the archived heap objects.
static address _requested_bottom;
@@ -193,6 +190,8 @@ class ArchiveHeapWriter : AllStatic {
return buffered_addr - buffer_bottom();
}
+ static void root_segment_at_put(objArrayOop segment, int index, oop root);
+ static objArrayOop allocate_root_segment(size_t offset, int element_count);
static void copy_roots_to_buffer(GrowableArrayCHeap* roots);
static void copy_source_objs_to_buffer(GrowableArrayCHeap* roots);
static size_t copy_one_source_obj_to_buffer(oop src_obj);
@@ -219,7 +218,6 @@ class ArchiveHeapWriter : AllStatic {
template static T* requested_addr_to_buffered_addr(T* p);
template static void relocate_field_in_buffer(T* field_addr_in_buffer, CHeapBitMap* oopmap);
template static void mark_oop_pointer(T* buffered_addr, CHeapBitMap* oopmap);
- template static void relocate_root_at(oop requested_roots, int index, CHeapBitMap* oopmap);
static void update_header_for_requested_obj(oop requested_obj, oop src_obj, Klass* src_klass);
@@ -234,13 +232,6 @@ class ArchiveHeapWriter : AllStatic {
static bool is_string_too_large_to_archive(oop string);
static void write(GrowableArrayCHeap*, ArchiveHeapInfo* heap_info);
static address requested_address(); // requested address of the lowest achived heap object
- static oop heap_roots_requested_address(); // requested address of HeapShared::roots()
- static address buffered_heap_roots_addr() {
- return offset_to_buffered_address(_heap_roots_offset);
- }
- static size_t heap_roots_word_size() {
- return _heap_roots_word_size;
- }
static size_t get_filler_size_at(address buffered_addr);
static void mark_native_pointer(oop src_obj, int offset);
diff --git a/src/hotspot/share/cds/archiveUtils.cpp b/src/hotspot/share/cds/archiveUtils.cpp
index 76cfa441fa7f8..4622a27cbec72 100644
--- a/src/hotspot/share/cds/archiveUtils.cpp
+++ b/src/hotspot/share/cds/archiveUtils.cpp
@@ -369,3 +369,24 @@ void ArchiveUtils::log_to_classlist(BootstrapInfo* bootstrap_specifier, TRAPS) {
}
}
}
+
+size_t HeapRootSegments::size_in_bytes(size_t seg_idx) {
+ assert(seg_idx < _count, "In range");
+ return objArrayOopDesc::object_size(size_in_elems(seg_idx)) * HeapWordSize;
+}
+
+int HeapRootSegments::size_in_elems(size_t seg_idx) {
+ assert(seg_idx < _count, "In range");
+ if (seg_idx != _count - 1) {
+ return _max_size_in_elems;
+ } else {
+ // Last slice, leftover
+ return _roots_count % _max_size_in_elems;
+ }
+}
+
+size_t HeapRootSegments::segment_offset(size_t seg_idx) {
+ assert(seg_idx < _count, "In range");
+ return _base_offset + seg_idx * _max_size_in_bytes;
+}
+
diff --git a/src/hotspot/share/cds/archiveUtils.hpp b/src/hotspot/share/cds/archiveUtils.hpp
index 32cef97886f06..5a78bc26ee627 100644
--- a/src/hotspot/share/cds/archiveUtils.hpp
+++ b/src/hotspot/share/cds/archiveUtils.hpp
@@ -250,4 +250,44 @@ class ArchiveUtils {
static void log_to_classlist(BootstrapInfo* bootstrap_specifier, TRAPS) NOT_CDS_RETURN;
};
+class HeapRootSegments {
+private:
+ size_t _base_offset;
+ size_t _count;
+ int _roots_count;
+ int _max_size_in_bytes;
+ int _max_size_in_elems;
+
+public:
+ size_t base_offset() { return _base_offset; }
+ size_t count() { return _count; }
+ int roots_count() { return _roots_count; }
+ int max_size_in_bytes() { return _max_size_in_bytes; }
+ int max_size_in_elems() { return _max_size_in_elems; }
+
+ size_t size_in_bytes(size_t seg_idx);
+ int size_in_elems(size_t seg_idx);
+ size_t segment_offset(size_t seg_idx);
+
+ // Trivial copy assignments are allowed to copy the entire object representation.
+ // We also inline this class into archive header. Therefore, it is important to make
+ // sure any gaps in object representation are initialized to zeroes. This is why
+ // constructors memset before doing field assignments.
+ HeapRootSegments() {
+ memset(this, 0, sizeof(*this));
+ }
+ HeapRootSegments(size_t base_offset, int roots_count, int max_size_in_bytes, int max_size_in_elems) {
+ memset(this, 0, sizeof(*this));
+ _base_offset = base_offset;
+ _count = (roots_count + max_size_in_elems - 1) / max_size_in_elems;
+ _roots_count = roots_count;
+ _max_size_in_bytes = max_size_in_bytes;
+ _max_size_in_elems = max_size_in_elems;
+ }
+
+ // This class is trivially copyable and assignable.
+ HeapRootSegments(const HeapRootSegments&) = default;
+ HeapRootSegments& operator=(const HeapRootSegments&) = default;
+};
+
#endif // SHARE_CDS_ARCHIVEUTILS_HPP
diff --git a/src/hotspot/share/cds/cdsConfig.cpp b/src/hotspot/share/cds/cdsConfig.cpp
index a0a562eca21a0..5915424c4fe87 100644
--- a/src/hotspot/share/cds/cdsConfig.cpp
+++ b/src/hotspot/share/cds/cdsConfig.cpp
@@ -236,7 +236,7 @@ void CDSConfig::init_shared_archive_paths() {
}
void CDSConfig::check_internal_module_property(const char* key, const char* value) {
- if (Arguments::is_internal_module_property(key)) {
+ if (Arguments::is_internal_module_property(key) && !Arguments::is_module_path_property(key)) {
stop_using_optimized_module_handling();
log_info(cds)("optimized module handling: disabled due to incompatible property: %s=%s", key, value);
}
diff --git a/src/hotspot/share/cds/classListParser.cpp b/src/hotspot/share/cds/classListParser.cpp
index f8d24295a12e5..694a179d7ee6c 100644
--- a/src/hotspot/share/cds/classListParser.cpp
+++ b/src/hotspot/share/cds/classListParser.cpp
@@ -508,7 +508,9 @@ InstanceKlass* ClassListParser::load_class_from_source(Symbol* class_name, TRAPS
THROW_NULL(vmSymbols::java_lang_ClassNotFoundException());
}
- InstanceKlass* k = UnregisteredClasses::load_class(class_name, _source, CHECK_NULL);
+ ResourceMark rm;
+ char * source_path = os::strdup_check_oom(ClassLoader::uri_to_path(_source));
+ InstanceKlass* k = UnregisteredClasses::load_class(class_name, source_path, CHECK_NULL);
if (k->local_interfaces()->length() != _interfaces->length()) {
print_specified_interfaces();
print_actual_interfaces(k);
diff --git a/src/hotspot/share/cds/classListWriter.cpp b/src/hotspot/share/cds/classListWriter.cpp
index 78cd092445b70..1b9f589f1c5e5 100644
--- a/src/hotspot/share/cds/classListWriter.cpp
+++ b/src/hotspot/share/cds/classListWriter.cpp
@@ -174,6 +174,8 @@ void ClassListWriter::write_to_stream(const InstanceKlass* k, outputStream* stre
}
}
+ // NB: the string following "source: " is not really a proper file name, but rather
+ // a truncated URI referring to a file. It must be decoded after reading.
#ifdef _WINDOWS
// "file:/C:/dir/foo.jar" -> "C:/dir/foo.jar"
stream->print(" source: %s", cfs->source() + 6);
diff --git a/src/hotspot/share/cds/filemap.cpp b/src/hotspot/share/cds/filemap.cpp
index 35c43157b1ab3..715fce5f3fc86 100644
--- a/src/hotspot/share/cds/filemap.cpp
+++ b/src/hotspot/share/cds/filemap.cpp
@@ -289,7 +289,11 @@ void FileMapHeader::print(outputStream* st) {
st->print_cr("- has_non_jar_in_classpath: %d", _has_non_jar_in_classpath);
st->print_cr("- requested_base_address: " INTPTR_FORMAT, p2i(_requested_base_address));
st->print_cr("- mapped_base_address: " INTPTR_FORMAT, p2i(_mapped_base_address));
- st->print_cr("- heap_roots_offset: " SIZE_FORMAT, _heap_roots_offset);
+ st->print_cr("- heap_root_segments.roots_count: %d" , _heap_root_segments.roots_count());
+ st->print_cr("- heap_root_segments.base_offset: " SIZE_FORMAT_X, _heap_root_segments.base_offset());
+ st->print_cr("- heap_root_segments.count: " SIZE_FORMAT, _heap_root_segments.count());
+ st->print_cr("- heap_root_segments.max_size_elems: %d", _heap_root_segments.max_size_in_elems());
+ st->print_cr("- heap_root_segments.max_size_bytes: %d", _heap_root_segments.max_size_in_bytes());
st->print_cr("- _heap_oopmap_start_pos: " SIZE_FORMAT, _heap_oopmap_start_pos);
st->print_cr("- _heap_ptrmap_start_pos: " SIZE_FORMAT, _heap_ptrmap_start_pos);
st->print_cr("- _rw_ptrmap_start_pos: " SIZE_FORMAT, _rw_ptrmap_start_pos);
@@ -577,7 +581,7 @@ int FileMapInfo::get_module_shared_path_index(Symbol* location) {
// skip_uri_protocol was also called during dump time -- see ClassLoaderExt::process_module_table()
ResourceMark rm;
- const char* file = ClassLoader::skip_uri_protocol(location->as_C_string());
+ const char* file = ClassLoader::uri_to_path(location->as_C_string());
for (int i = ClassLoaderExt::app_module_paths_start_index(); i < get_number_of_shared_paths(); i++) {
SharedClassPathEntry* ent = shared_path(i);
if (!ent->is_non_existent()) {
@@ -777,12 +781,12 @@ bool FileMapInfo::check_paths(int shared_path_start_idx, int num_paths, Growable
assert(strlen(rp_array->at(i)) > (size_t)runtime_prefix_len, "sanity");
const char* runtime_path = rp_array->at(i) + runtime_prefix_len;
if (!os::same_files(dumptime_path, runtime_path)) {
- return true;
+ return false;
}
i++;
j++;
}
- return false;
+ return true;
}
bool FileMapInfo::validate_boot_class_paths() {
@@ -806,7 +810,7 @@ bool FileMapInfo::validate_boot_class_paths() {
char* rp = skip_first_path_entry(runtime_boot_path);
assert(shared_path(0)->is_modules_image(), "first shared_path must be the modules image");
int dp_len = header()->app_class_paths_start_index() - 1; // ignore the first path to the module image
- bool mismatch = false;
+ bool match = true;
bool relaxed_check = !header()->has_platform_or_app_classes();
if (dp_len == 0 && rp == nullptr) {
@@ -819,7 +823,7 @@ bool FileMapInfo::validate_boot_class_paths() {
if (check_paths_existence(rp)) {
// If a path exists in the runtime boot paths, it is considered a mismatch
// since there's no boot path specified during dump time.
- mismatch = true;
+ match = false;
}
}
} else if (dp_len > 0 && rp != nullptr) {
@@ -836,16 +840,16 @@ bool FileMapInfo::validate_boot_class_paths() {
// check the full runtime boot path, must match with dump time
num = rp_len;
}
- mismatch = check_paths(1, num, rp_array, 0, 0);
+ match = check_paths(1, num, rp_array, 0, 0);
} else {
// create_path_array() ignores non-existing paths. Although the dump time and runtime boot classpath lengths
// are the same initially, after the call to create_path_array(), the runtime boot classpath length could become
// shorter. We consider boot classpath mismatch in this case.
- mismatch = true;
+ match = false;
}
}
- if (mismatch) {
+ if (!match) {
// The paths are different
return classpath_failure("[BOOT classpath mismatch, actual =", runtime_boot_path);
}
@@ -856,7 +860,7 @@ bool FileMapInfo::validate_app_class_paths(int shared_app_paths_len) {
const char *appcp = Arguments::get_appclasspath();
assert(appcp != nullptr, "null app classpath");
int rp_len = num_paths(appcp);
- bool mismatch = false;
+ bool match = false;
if (rp_len < shared_app_paths_len) {
return classpath_failure("Run time APP classpath is shorter than the one at dump time: ", appcp);
}
@@ -885,8 +889,8 @@ bool FileMapInfo::validate_app_class_paths(int shared_app_paths_len) {
// run 2: -cp x.jar:NE4:b.jar -> x.jar:b.jar -> mismatched
int j = header()->app_class_paths_start_index();
- mismatch = check_paths(j, shared_app_paths_len, rp_array, 0, 0);
- if (mismatch) {
+ match = check_paths(j, shared_app_paths_len, rp_array, 0, 0);
+ if (!match) {
// To facilitate app deployment, we allow the JAR files to be moved *together* to
// a different location, as long as they are still stored under the same directory
// structure. E.g., the following is OK.
@@ -897,10 +901,10 @@ bool FileMapInfo::validate_app_class_paths(int shared_app_paths_len) {
if (dumptime_prefix_len != 0 || runtime_prefix_len != 0) {
log_info(class, path)("LCP length for app classpath (dumptime: %u, runtime: %u)",
dumptime_prefix_len, runtime_prefix_len);
- mismatch = check_paths(j, shared_app_paths_len, rp_array,
+ match = check_paths(j, shared_app_paths_len, rp_array,
dumptime_prefix_len, runtime_prefix_len);
}
- if (mismatch) {
+ if (!match) {
return classpath_failure("[APP classpath mismatch, actual: -Djava.class.path=", appcp);
}
}
@@ -922,15 +926,35 @@ void FileMapInfo::log_paths(const char* msg, int start_idx, int end_idx) {
}
}
+void FileMapInfo::extract_module_paths(const char* runtime_path, GrowableArray* module_paths) {
+ GrowableArray* path_array = create_path_array(runtime_path);
+ int num_paths = path_array->length();
+ for (int i = 0; i < num_paths; i++) {
+ const char* name = path_array->at(i);
+ ClassLoaderExt::extract_jar_files_from_path(name, module_paths);
+ }
+ // module paths are stored in sorted order in the CDS archive.
+ module_paths->sort(ClassLoaderExt::compare_module_path_by_name);
+}
+
bool FileMapInfo::check_module_paths() {
- const char* rp = Arguments::get_property("jdk.module.path");
- int num_paths = CDSConfig::num_archives(rp);
- if (num_paths != header()->num_module_paths()) {
+ const char* runtime_path = Arguments::get_property("jdk.module.path");
+ int archived_num_module_paths = header()->num_module_paths();
+ if (runtime_path == nullptr && archived_num_module_paths == 0) {
+ return true;
+ }
+ if ((runtime_path == nullptr && archived_num_module_paths > 0) ||
+ (runtime_path != nullptr && archived_num_module_paths == 0)) {
return false;
}
ResourceMark rm;
- GrowableArray* rp_array = create_path_array(rp);
- return check_paths(header()->app_module_paths_start_index(), num_paths, rp_array, 0, 0);
+ GrowableArray* module_paths = new GrowableArray(3);
+ extract_module_paths(runtime_path, module_paths);
+ int num_paths = module_paths->length();
+ if (num_paths != archived_num_module_paths) {
+ return false;
+ }
+ return check_paths(header()->app_module_paths_start_index(), num_paths, module_paths, 0, 0);
}
bool FileMapInfo::validate_shared_path_table() {
@@ -940,6 +964,16 @@ bool FileMapInfo::validate_shared_path_table() {
// Load the shared path table info from the archive header
_shared_path_table = header()->shared_path_table();
+
+ bool matched_module_paths = true;
+ if (CDSConfig::is_dumping_dynamic_archive() || header()->has_full_module_graph()) {
+ matched_module_paths = check_module_paths();
+ }
+ if (header()->has_full_module_graph() && !matched_module_paths) {
+ CDSConfig::stop_using_optimized_module_handling();
+ log_info(cds)("optimized module handling: disabled because of mismatched module paths");
+ }
+
if (CDSConfig::is_dumping_dynamic_archive()) {
// Only support dynamic dumping with the usage of the default CDS archive
// or a simple base archive.
@@ -955,7 +989,7 @@ bool FileMapInfo::validate_shared_path_table() {
"Dynamic archiving is disabled because base layer archive has appended boot classpath");
}
if (header()->num_module_paths() > 0) {
- if (!check_module_paths()) {
+ if (!matched_module_paths) {
CDSConfig::disable_dumping_dynamic_archive();
log_warning(cds)(
"Dynamic archiving is disabled because base layer archive has a different module path");
@@ -1577,39 +1611,38 @@ static size_t write_bitmap(const CHeapBitMap* map, char* output, size_t offset)
return offset + size_in_bytes;
}
-// The start of the archived heap has many primitive arrays (String
-// bodies) that are not marked by the oop/ptr maps. So we must have
-// lots of leading zeros.
-size_t FileMapInfo::remove_bitmap_leading_zeros(CHeapBitMap* map) {
- size_t old_zeros = map->find_first_set_bit(0);
+// The sorting code groups the objects with non-null oop/ptrs together.
+// Relevant bitmaps then have lots of leading and trailing zeros, which
+// we do not have to store.
+size_t FileMapInfo::remove_bitmap_zeros(CHeapBitMap* map) {
+ BitMap::idx_t first_set = map->find_first_set_bit(0);
+ BitMap::idx_t last_set = map->find_last_set_bit(0);
size_t old_size = map->size();
// Slice and resize bitmap
- map->truncate(old_zeros, map->size());
+ map->truncate(first_set, last_set + 1);
- DEBUG_ONLY(
- size_t new_zeros = map->find_first_set_bit(0);
- assert(new_zeros == 0, "Should have removed leading zeros");
- )
+ assert(map->at(0), "First bit should be set");
+ assert(map->at(map->size() - 1), "Last bit should be set");
assert(map->size() <= old_size, "sanity");
- return old_zeros;
+
+ return first_set;
}
char* FileMapInfo::write_bitmap_region(CHeapBitMap* rw_ptrmap, CHeapBitMap* ro_ptrmap, ArchiveHeapInfo* heap_info,
size_t &size_in_bytes) {
- size_t removed_rw_zeros = remove_bitmap_leading_zeros(rw_ptrmap);
- size_t removed_ro_zeros = remove_bitmap_leading_zeros(ro_ptrmap);
- header()->set_rw_ptrmap_start_pos(removed_rw_zeros);
- header()->set_ro_ptrmap_start_pos(removed_ro_zeros);
+ size_t removed_rw_leading_zeros = remove_bitmap_zeros(rw_ptrmap);
+ size_t removed_ro_leading_zeros = remove_bitmap_zeros(ro_ptrmap);
+ header()->set_rw_ptrmap_start_pos(removed_rw_leading_zeros);
+ header()->set_ro_ptrmap_start_pos(removed_ro_leading_zeros);
size_in_bytes = rw_ptrmap->size_in_bytes() + ro_ptrmap->size_in_bytes();
if (heap_info->is_used()) {
- // Remove leading zeros
- size_t removed_oop_zeros = remove_bitmap_leading_zeros(heap_info->oopmap());
- size_t removed_ptr_zeros = remove_bitmap_leading_zeros(heap_info->ptrmap());
-
- header()->set_heap_oopmap_start_pos(removed_oop_zeros);
- header()->set_heap_ptrmap_start_pos(removed_ptr_zeros);
+ // Remove leading and trailing zeros
+ size_t removed_oop_leading_zeros = remove_bitmap_zeros(heap_info->oopmap());
+ size_t removed_ptr_leading_zeros = remove_bitmap_zeros(heap_info->ptrmap());
+ header()->set_heap_oopmap_start_pos(removed_oop_leading_zeros);
+ header()->set_heap_ptrmap_start_pos(removed_ptr_leading_zeros);
size_in_bytes += heap_info->oopmap()->size_in_bytes();
size_in_bytes += heap_info->ptrmap()->size_in_bytes();
@@ -1647,7 +1680,7 @@ size_t FileMapInfo::write_heap_region(ArchiveHeapInfo* heap_info) {
char* buffer_start = heap_info->buffer_start();
size_t buffer_size = heap_info->buffer_byte_size();
write_region(MetaspaceShared::hp, buffer_start, buffer_size, false, false);
- header()->set_heap_roots_offset(heap_info->heap_roots_offset());
+ header()->set_heap_root_segments(heap_info->heap_root_segments());
return buffer_size;
}
@@ -1712,10 +1745,10 @@ void FileMapInfo::close() {
*/
static char* map_memory(int fd, const char* file_name, size_t file_offset,
char *addr, size_t bytes, bool read_only,
- bool allow_exec, MEMFLAGS flags = mtNone) {
+ bool allow_exec, MemTag mem_tag = mtNone) {
char* mem = os::map_memory(fd, file_name, file_offset, addr, bytes,
AlwaysPreTouch ? false : read_only,
- allow_exec, flags);
+ allow_exec, mem_tag);
if (mem != nullptr && AlwaysPreTouch) {
os::pretouch_memory(mem, mem + bytes);
}
@@ -2174,7 +2207,7 @@ bool FileMapInfo::map_heap_region_impl() {
_mapped_heap_memregion = MemRegion(start, word_size);
- // Map the archived heap data. No need to call MemTracker::record_virtual_memory_type()
+ // Map the archived heap data. No need to call MemTracker::record_virtual_memory_tag()
// for mapped region as it is part of the reserved java heap, which is already recorded.
char* addr = (char*)_mapped_heap_memregion.start();
char* base;
diff --git a/src/hotspot/share/cds/filemap.hpp b/src/hotspot/share/cds/filemap.hpp
index 7b10c16920b8f..6650f52440881 100644
--- a/src/hotspot/share/cds/filemap.hpp
+++ b/src/hotspot/share/cds/filemap.hpp
@@ -25,6 +25,7 @@
#ifndef SHARE_CDS_FILEMAP_HPP
#define SHARE_CDS_FILEMAP_HPP
+#include "cds/archiveUtils.hpp"
#include "cds/metaspaceShared.hpp"
#include "include/cds.h"
#include "logging/logLevel.hpp"
@@ -225,8 +226,7 @@ class FileMapHeader: private CDSFileMapHeaderBase {
bool _use_optimized_module_handling;// No module-relation VM options were specified, so we can skip
// some expensive operations.
bool _has_full_module_graph; // Does this CDS archive contain the full archived module graph?
- size_t _heap_roots_offset; // Offset of the HeapShared::roots() object, from the bottom
- // of the archived heap objects, in bytes.
+ HeapRootSegments _heap_root_segments; // Heap root segments info
size_t _heap_oopmap_start_pos; // The first bit in the oopmap corresponds to this position in the heap.
size_t _heap_ptrmap_start_pos; // The first bit in the ptrmap corresponds to this position in the heap.
size_t _rw_ptrmap_start_pos; // The first bit in the ptrmap corresponds to this position in the rw region
@@ -270,7 +270,8 @@ class FileMapHeader: private CDSFileMapHeaderBase {
bool has_non_jar_in_classpath() const { return _has_non_jar_in_classpath; }
bool compressed_oops() const { return _compressed_oops; }
bool compressed_class_pointers() const { return _compressed_class_ptrs; }
- size_t heap_roots_offset() const { return _heap_roots_offset; }
+ HeapRootSegments heap_root_segments() const { return _heap_root_segments; }
+ bool has_full_module_graph() const { return _has_full_module_graph; }
size_t heap_oopmap_start_pos() const { return _heap_oopmap_start_pos; }
size_t heap_ptrmap_start_pos() const { return _heap_ptrmap_start_pos; }
size_t rw_ptrmap_start_pos() const { return _rw_ptrmap_start_pos; }
@@ -285,7 +286,7 @@ class FileMapHeader: private CDSFileMapHeaderBase {
void set_cloned_vtables(char* p) { set_as_offset(p, &_cloned_vtables_offset); }
void set_serialized_data(char* p) { set_as_offset(p, &_serialized_data_offset); }
void set_mapped_base_address(char* p) { _mapped_base_address = p; }
- void set_heap_roots_offset(size_t n) { _heap_roots_offset = n; }
+ void set_heap_root_segments(HeapRootSegments segments) { _heap_root_segments = segments; }
void set_heap_oopmap_start_pos(size_t n) { _heap_oopmap_start_pos = n; }
void set_heap_ptrmap_start_pos(size_t n) { _heap_ptrmap_start_pos = n; }
void set_rw_ptrmap_start_pos(size_t n) { _rw_ptrmap_start_pos = n; }
@@ -385,7 +386,7 @@ class FileMapInfo : public CHeapObj {
address narrow_oop_base() const { return header()->narrow_oop_base(); }
int narrow_oop_shift() const { return header()->narrow_oop_shift(); }
uintx max_heap_size() const { return header()->max_heap_size(); }
- size_t heap_roots_offset() const { return header()->heap_roots_offset(); }
+ HeapRootSegments heap_root_segments() const { return header()->heap_root_segments(); }
size_t core_region_alignment() const { return header()->core_region_alignment(); }
size_t heap_oopmap_start_pos() const { return header()->heap_oopmap_start_pos(); }
size_t heap_ptrmap_start_pos() const { return header()->heap_ptrmap_start_pos(); }
@@ -445,7 +446,7 @@ class FileMapInfo : public CHeapObj {
void write_header();
void write_region(int region, char* base, size_t size,
bool read_only, bool allow_exec);
- size_t remove_bitmap_leading_zeros(CHeapBitMap* map);
+ size_t remove_bitmap_zeros(CHeapBitMap* map);
char* write_bitmap_region(CHeapBitMap* rw_ptrmap, CHeapBitMap* ro_ptrmap, ArchiveHeapInfo* heap_info,
size_t &size_in_bytes);
size_t write_heap_region(ArchiveHeapInfo* heap_info);
@@ -554,6 +555,7 @@ class FileMapInfo : public CHeapObj {
GrowableArray* rp_array,
unsigned int dumptime_prefix_len,
unsigned int runtime_prefix_len) NOT_CDS_RETURN_(false);
+ void extract_module_paths(const char* runtime_path, GrowableArray* module_paths);
bool validate_boot_class_paths() NOT_CDS_RETURN_(false);
bool validate_app_class_paths(int shared_app_paths_len) NOT_CDS_RETURN_(false);
bool map_heap_region_impl() NOT_CDS_JAVA_HEAP_RETURN_(false);
diff --git a/src/hotspot/share/cds/heapShared.cpp b/src/hotspot/share/cds/heapShared.cpp
index abfc0f9d64be1..81aa7ac94dc21 100644
--- a/src/hotspot/share/cds/heapShared.cpp
+++ b/src/hotspot/share/cds/heapShared.cpp
@@ -33,6 +33,7 @@
#include "cds/heapShared.hpp"
#include "cds/metaspaceShared.hpp"
#include "classfile/classLoaderData.hpp"
+#include "classfile/classLoaderExt.hpp"
#include "classfile/javaClasses.inline.hpp"
#include "classfile/modules.hpp"
#include "classfile/stringTable.hpp"
@@ -55,6 +56,7 @@
#include "oops/oop.inline.hpp"
#include "oops/typeArrayOop.inline.hpp"
#include "prims/jvmtiExport.hpp"
+#include "runtime/arguments.hpp"
#include "runtime/fieldDescriptor.inline.hpp"
#include "runtime/init.hpp"
#include "runtime/javaCalls.hpp"
@@ -133,7 +135,8 @@ static ArchivableStaticFieldInfo fmg_archive_subgraph_entry_fields[] = {
KlassSubGraphInfo* HeapShared::_default_subgraph_info;
GrowableArrayCHeap* HeapShared::_pending_roots = nullptr;
-OopHandle HeapShared::_roots;
+GrowableArrayCHeap* HeapShared::_root_segments;
+int HeapShared::_root_segment_max_size_elems;
OopHandle HeapShared::_scratch_basic_type_mirrors[T_VOID+1];
MetaspaceObjToOopHandleTable* HeapShared::_scratch_java_mirror_table = nullptr;
MetaspaceObjToOopHandleTable* HeapShared::_scratch_references_table = nullptr;
@@ -225,7 +228,7 @@ int HeapShared::append_root(oop obj) {
return _pending_roots->append(obj);
}
-objArrayOop HeapShared::roots() {
+objArrayOop HeapShared::root_segment(int segment_idx) {
if (CDSConfig::is_dumping_heap()) {
assert(Thread::current() == (Thread*)VMThread::vm_thread(), "should be in vm thread");
if (!HeapShared::can_write()) {
@@ -235,17 +238,35 @@ objArrayOop HeapShared::roots() {
assert(CDSConfig::is_using_archive(), "must be");
}
- objArrayOop roots = (objArrayOop)_roots.resolve();
- assert(roots != nullptr, "should have been initialized");
- return roots;
+ objArrayOop segment = (objArrayOop)_root_segments->at(segment_idx).resolve();
+ assert(segment != nullptr, "should have been initialized");
+ return segment;
+}
+
+void HeapShared::get_segment_indexes(int idx, int& seg_idx, int& int_idx) {
+ assert(_root_segment_max_size_elems > 0, "sanity");
+
+ // Try to avoid divisions for the common case.
+ if (idx < _root_segment_max_size_elems) {
+ seg_idx = 0;
+ int_idx = idx;
+ } else {
+ seg_idx = idx / _root_segment_max_size_elems;
+ int_idx = idx % _root_segment_max_size_elems;
+ }
+
+ assert(idx == seg_idx * _root_segment_max_size_elems + int_idx,
+ "sanity: %d index maps to %d segment and %d internal", idx, seg_idx, int_idx);
}
// Returns an objArray that contains all the roots of the archived objects
oop HeapShared::get_root(int index, bool clear) {
assert(index >= 0, "sanity");
assert(!CDSConfig::is_dumping_heap() && CDSConfig::is_using_archive(), "runtime only");
- assert(!_roots.is_empty(), "must have loaded shared heap");
- oop result = roots()->obj_at(index);
+ assert(!_root_segments->is_empty(), "must have loaded shared heap");
+ int seg_idx, int_idx;
+ get_segment_indexes(index, seg_idx, int_idx);
+ oop result = root_segment(seg_idx)->obj_at(int_idx);
if (clear) {
clear_root(index);
}
@@ -256,11 +277,13 @@ void HeapShared::clear_root(int index) {
assert(index >= 0, "sanity");
assert(CDSConfig::is_using_archive(), "must be");
if (ArchiveHeapLoader::is_in_use()) {
+ int seg_idx, int_idx;
+ get_segment_indexes(index, seg_idx, int_idx);
if (log_is_enabled(Debug, cds, heap)) {
- oop old = roots()->obj_at(index);
+ oop old = root_segment(seg_idx)->obj_at(int_idx);
log_debug(cds, heap)("Clearing root %d: was " PTR_FORMAT, index, p2i(old));
}
- roots()->obj_at_put(index, nullptr);
+ root_segment(seg_idx)->obj_at_put(int_idx, nullptr);
}
}
@@ -461,11 +484,13 @@ void HeapShared::archive_objects(ArchiveHeapInfo *heap_info) {
// Cache for recording where the archived objects are copied to
create_archived_object_cache();
- log_info(cds)("Heap range = [" PTR_FORMAT " - " PTR_FORMAT "]",
- UseCompressedOops ? p2i(CompressedOops::begin()) :
- p2i((address)G1CollectedHeap::heap()->reserved().start()),
- UseCompressedOops ? p2i(CompressedOops::end()) :
- p2i((address)G1CollectedHeap::heap()->reserved().end()));
+ if (UseCompressedOops || UseG1GC) {
+ log_info(cds)("Heap range = [" PTR_FORMAT " - " PTR_FORMAT "]",
+ UseCompressedOops ? p2i(CompressedOops::begin()) :
+ p2i((address)G1CollectedHeap::heap()->reserved().start()),
+ UseCompressedOops ? p2i(CompressedOops::end()) :
+ p2i((address)G1CollectedHeap::heap()->reserved().end()));
+ }
copy_objects();
CDSHeapVerifier::verify();
@@ -764,11 +789,17 @@ void HeapShared::write_subgraph_info_table() {
}
}
-void HeapShared::init_roots(oop roots_oop) {
- if (roots_oop != nullptr) {
- assert(ArchiveHeapLoader::is_in_use(), "must be");
- _roots = OopHandle(Universe::vm_global(), roots_oop);
+void HeapShared::add_root_segment(objArrayOop segment_oop) {
+ assert(segment_oop != nullptr, "must be");
+ assert(ArchiveHeapLoader::is_in_use(), "must be");
+ if (_root_segments == nullptr) {
+ _root_segments = new GrowableArrayCHeap(10);
}
+ _root_segments->push(OopHandle(Universe::vm_global(), segment_oop));
+}
+
+void HeapShared::init_root_segment_sizes(int max_size_elems) {
+ _root_segment_max_size_elems = max_size_elems;
}
void HeapShared::serialize_tables(SerializeClosure* soc) {
@@ -855,6 +886,17 @@ void HeapShared::initialize_from_archived_subgraph(JavaThread* current, Klass* k
return; // nothing to do
}
+ if (k->name()->equals("jdk/internal/module/ArchivedModuleGraph") &&
+ !CDSConfig::is_using_optimized_module_handling() &&
+ // archive was created with --module-path
+ ClassLoaderExt::num_module_paths() > 0) {
+ // ArchivedModuleGraph was created with a --module-path that's different than the runtime --module-path.
+ // Thus, it might contain references to modules that do not exist at runtime. We cannot use it.
+ log_info(cds, heap)("Skip initializing ArchivedModuleGraph subgraph: is_using_optimized_module_handling=%s num_module_paths=%d",
+ BOOL_TO_STR(CDSConfig::is_using_optimized_module_handling()), ClassLoaderExt::num_module_paths());
+ return;
+ }
+
ExceptionMark em(THREAD);
const ArchivedKlassSubGraphInfoRecord* record =
resolve_or_init_classes_for_subgraph_of(k, /*do_init=*/true, THREAD);
@@ -1103,6 +1145,13 @@ bool HeapShared::archive_reachable_objects_from(int level,
// these objects that are referenced (directly or indirectly) by static fields.
ResourceMark rm;
log_error(cds, heap)("Cannot archive object of class %s", orig_obj->klass()->external_name());
+ if (log_is_enabled(Trace, cds, heap)) {
+ WalkOopAndArchiveClosure* walker = WalkOopAndArchiveClosure::current();
+ if (walker != nullptr) {
+ LogStream ls(Log(cds, heap)::trace());
+ CDSHeapVerifier::trace_to_root(&ls, walker->referencing_obj());
+ }
+ }
MetaspaceShared::unrecoverable_writing_error();
}
@@ -1304,6 +1353,9 @@ void HeapShared::check_default_subgraph_classes() {
name == vmSymbols::java_lang_ArithmeticException() ||
name == vmSymbols::java_lang_NullPointerException() ||
name == vmSymbols::java_lang_InternalError() ||
+ name == vmSymbols::java_lang_ArrayIndexOutOfBoundsException() ||
+ name == vmSymbols::java_lang_ArrayStoreException() ||
+ name == vmSymbols::java_lang_ClassCastException() ||
name == vmSymbols::object_array_signature() ||
name == vmSymbols::byte_array_signature() ||
name == vmSymbols::char_array_signature(),
diff --git a/src/hotspot/share/cds/heapShared.hpp b/src/hotspot/share/cds/heapShared.hpp
index 0ba20f1e313b7..01d664945ee74 100644
--- a/src/hotspot/share/cds/heapShared.hpp
+++ b/src/hotspot/share/cds/heapShared.hpp
@@ -143,13 +143,13 @@ class HeapShared: AllStatic {
friend class VerifySharedOopClosure;
public:
- // Can this VM write a heap region into the CDS archive? Currently only G1+compressed{oops,cp}
+ // Can this VM write a heap region into the CDS archive? Currently only {G1|Parallel|Serial}+compressed_cp
static bool can_write() {
CDS_JAVA_HEAP_ONLY(
if (_disable_writing) {
return false;
}
- return (UseG1GC && UseCompressedClassPointers);
+ return (UseG1GC || UseParallelGC || UseSerialGC) && UseCompressedClassPointers;
)
NOT_CDS_JAVA_HEAP(return false;)
}
@@ -290,7 +290,8 @@ class HeapShared: AllStatic {
static KlassSubGraphInfo* _default_subgraph_info;
static GrowableArrayCHeap* _pending_roots;
- static OopHandle _roots;
+ static GrowableArrayCHeap* _root_segments;
+ static int _root_segment_max_size_elems;
static OopHandle _scratch_basic_type_mirrors[T_VOID+1];
static MetaspaceObjToOopHandleTable* _scratch_java_mirror_table;
static MetaspaceObjToOopHandleTable* _scratch_references_table;
@@ -399,12 +400,14 @@ class HeapShared: AllStatic {
static GrowableArrayCHeap* pending_roots() { return _pending_roots; }
// Dump-time and runtime
- static objArrayOop roots();
+ static objArrayOop root_segment(int segment_idx);
static oop get_root(int index, bool clear=false);
// Run-time only
static void clear_root(int index);
+ static void get_segment_indexes(int index, int& segment_index, int& internal_index);
+
static void setup_test_class(const char* test_class_name) PRODUCT_RETURN;
#endif // INCLUDE_CDS_JAVA_HEAP
@@ -422,7 +425,8 @@ class HeapShared: AllStatic {
static void init_for_dumping(TRAPS) NOT_CDS_JAVA_HEAP_RETURN;
static void write_subgraph_info_table() NOT_CDS_JAVA_HEAP_RETURN;
- static void init_roots(oop roots_oop) NOT_CDS_JAVA_HEAP_RETURN;
+ static void add_root_segment(objArrayOop segment_oop) NOT_CDS_JAVA_HEAP_RETURN;
+ static void init_root_segment_sizes(int max_size_elems) NOT_CDS_JAVA_HEAP_RETURN;
static void serialize_tables(SerializeClosure* soc) NOT_CDS_JAVA_HEAP_RETURN;
#ifndef PRODUCT
diff --git a/src/hotspot/share/cds/metaspaceShared.cpp b/src/hotspot/share/cds/metaspaceShared.cpp
index 4d978a7ad880f..6f646e162ecac 100644
--- a/src/hotspot/share/cds/metaspaceShared.cpp
+++ b/src/hotspot/share/cds/metaspaceShared.cpp
@@ -77,6 +77,7 @@
#include "runtime/globals.hpp"
#include "runtime/globals_extension.hpp"
#include "runtime/handles.inline.hpp"
+#include "runtime/javaCalls.hpp"
#include "runtime/os.inline.hpp"
#include "runtime/safepointVerifiers.hpp"
#include "runtime/sharedRuntime.hpp"
@@ -300,6 +301,7 @@ void MetaspaceShared::post_initialize(TRAPS) {
}
ClassLoaderExt::init_paths_start_index(info->app_class_paths_start_index());
ClassLoaderExt::init_app_module_paths_start_index(info->app_module_paths_start_index());
+ ClassLoaderExt::init_num_module_paths(info->header()->num_module_paths());
}
}
}
@@ -791,9 +793,22 @@ void MetaspaceShared::preload_and_dump_impl(StaticArchiveBuilder& builder, TRAPS
// Do this at the very end, when no Java code will be executed. Otherwise
// some new strings may be added to the intern table.
StringTable::allocate_shared_strings_array(CHECK);
+ } else {
+ log_info(cds)("Not dumping heap, reset CDSConfig::_is_using_optimized_module_handling");
+ CDSConfig::stop_using_optimized_module_handling();
}
#endif
+ // Dummy call to load classes used at CDS runtime
+ JavaValue result(T_OBJECT);
+ Handle path_string = java_lang_String::create_from_str("dummy.jar", CHECK);
+ JavaCalls::call_static(&result,
+ vmClasses::jdk_internal_loader_ClassLoaders_klass(),
+ vmSymbols::toFileURL_name(),
+ vmSymbols::toFileURL_signature(),
+ path_string,
+ CHECK);
+
VM_PopulateDumpSharedSpace op(builder);
VMThread::execute(&op);
@@ -1299,7 +1314,7 @@ char* MetaspaceShared::reserve_address_space_for_archives(FileMapInfo* static_ma
assert(base_address == nullptr ||
(address)archive_space_rs.base() == base_address, "Sanity");
// Register archive space with NMT.
- MemTracker::record_virtual_memory_type(archive_space_rs.base(), mtClassShared);
+ MemTracker::record_virtual_memory_tag(archive_space_rs.base(), mtClassShared);
return archive_space_rs.base();
}
return nullptr;
@@ -1361,8 +1376,8 @@ char* MetaspaceShared::reserve_address_space_for_archives(FileMapInfo* static_ma
return nullptr;
}
// NMT: fix up the space tags
- MemTracker::record_virtual_memory_type(archive_space_rs.base(), mtClassShared);
- MemTracker::record_virtual_memory_type(class_space_rs.base(), mtClass);
+ MemTracker::record_virtual_memory_tag(archive_space_rs.base(), mtClassShared);
+ MemTracker::record_virtual_memory_tag(class_space_rs.base(), mtClass);
} else {
if (use_archive_base_addr && base_address != nullptr) {
total_space_rs = ReservedSpace(total_range_size, base_address_alignment,
diff --git a/src/hotspot/share/ci/ciEnv.cpp b/src/hotspot/share/ci/ciEnv.cpp
index 3079d469ebe48..155ce032400e8 100644
--- a/src/hotspot/share/ci/ciEnv.cpp
+++ b/src/hotspot/share/ci/ciEnv.cpp
@@ -100,10 +100,6 @@ ciSymbol* ciEnv::_unloaded_cisymbol = nullptr;
ciInstanceKlass* ciEnv::_unloaded_ciinstance_klass = nullptr;
ciObjArrayKlass* ciEnv::_unloaded_ciobjarrayklass = nullptr;
-jobject ciEnv::_ArrayIndexOutOfBoundsException_handle = nullptr;
-jobject ciEnv::_ArrayStoreException_handle = nullptr;
-jobject ciEnv::_ClassCastException_handle = nullptr;
-
#ifndef PRODUCT
static bool firstEnv = true;
#endif /* PRODUCT */
@@ -158,10 +154,16 @@ ciEnv::ciEnv(CompileTask* task)
o = Universe::arithmetic_exception_instance();
assert(o != nullptr, "should have been initialized");
_ArithmeticException_instance = get_object(o)->as_instance();
+ o = Universe::array_index_out_of_bounds_exception_instance();
+ assert(o != nullptr, "should have been initialized");
+ _ArrayIndexOutOfBoundsException_instance = get_object(o)->as_instance();
+ o = Universe::array_store_exception_instance();
+ assert(o != nullptr, "should have been initialized");
+ _ArrayStoreException_instance = get_object(o)->as_instance();
+ o = Universe::class_cast_exception_instance();
+ assert(o != nullptr, "should have been initialized");
+ _ClassCastException_instance = get_object(o)->as_instance();
- _ArrayIndexOutOfBoundsException_instance = nullptr;
- _ArrayStoreException_instance = nullptr;
- _ClassCastException_instance = nullptr;
_the_null_string = nullptr;
_the_min_jint_string = nullptr;
@@ -363,29 +365,6 @@ void ciEnv::cache_dtrace_flags() {
_dtrace_alloc_probes = DTraceAllocProbes;
}
-// ------------------------------------------------------------------
-// helper for lazy exception creation
-ciInstance* ciEnv::get_or_create_exception(jobject& handle, Symbol* name) {
- VM_ENTRY_MARK;
- if (handle == nullptr) {
- // Cf. universe.cpp, creation of Universe::_null_ptr_exception_instance.
- InstanceKlass* ik = SystemDictionary::find_instance_klass(THREAD, name, Handle(), Handle());
- jobject objh = nullptr;
- if (ik != nullptr) {
- oop obj = ik->allocate_instance(THREAD);
- if (!HAS_PENDING_EXCEPTION)
- objh = JNIHandles::make_global(Handle(THREAD, obj));
- }
- if (HAS_PENDING_EXCEPTION) {
- CLEAR_PENDING_EXCEPTION;
- } else {
- handle = objh;
- }
- }
- oop obj = JNIHandles::resolve(handle);
- return obj == nullptr? nullptr: get_object(obj)->as_instance();
-}
-
ciInstanceKlass* ciEnv::get_box_klass_for_primitive_type(BasicType type) {
switch (type) {
case T_BOOLEAN: return Boolean_klass();
@@ -403,31 +382,6 @@ ciInstanceKlass* ciEnv::get_box_klass_for_primitive_type(BasicType type) {
}
}
-ciInstance* ciEnv::ArrayIndexOutOfBoundsException_instance() {
- if (_ArrayIndexOutOfBoundsException_instance == nullptr) {
- _ArrayIndexOutOfBoundsException_instance
- = get_or_create_exception(_ArrayIndexOutOfBoundsException_handle,
- vmSymbols::java_lang_ArrayIndexOutOfBoundsException());
- }
- return _ArrayIndexOutOfBoundsException_instance;
-}
-ciInstance* ciEnv::ArrayStoreException_instance() {
- if (_ArrayStoreException_instance == nullptr) {
- _ArrayStoreException_instance
- = get_or_create_exception(_ArrayStoreException_handle,
- vmSymbols::java_lang_ArrayStoreException());
- }
- return _ArrayStoreException_instance;
-}
-ciInstance* ciEnv::ClassCastException_instance() {
- if (_ClassCastException_instance == nullptr) {
- _ClassCastException_instance
- = get_or_create_exception(_ClassCastException_handle,
- vmSymbols::java_lang_ClassCastException());
- }
- return _ClassCastException_instance;
-}
-
ciInstance* ciEnv::the_null_string() {
if (_the_null_string == nullptr) {
VM_ENTRY_MARK;
@@ -1662,7 +1616,10 @@ void ciEnv::dump_replay_data_helper(outputStream* out) {
for (int i = 0; i < objects->length(); i++) {
objects->at(i)->dump_replay_data(out);
}
- dump_compile_data(out);
+
+ if (this->task() != nullptr) {
+ dump_compile_data(out);
+ }
out->flush();
}
diff --git a/src/hotspot/share/ci/ciEnv.hpp b/src/hotspot/share/ci/ciEnv.hpp
index 5ee9b420033ef..6c66633ee1749 100644
--- a/src/hotspot/share/ci/ciEnv.hpp
+++ b/src/hotspot/share/ci/ciEnv.hpp
@@ -94,10 +94,6 @@ class ciEnv : StackObj {
static ciInstanceKlass* _unloaded_ciinstance_klass;
static ciObjArrayKlass* _unloaded_ciobjarrayklass;
- static jobject _ArrayIndexOutOfBoundsException_handle;
- static jobject _ArrayStoreException_handle;
- static jobject _ClassCastException_handle;
-
ciInstance* _NullPointerException_instance;
ciInstance* _ArithmeticException_instance;
ciInstance* _ArrayIndexOutOfBoundsException_instance;
@@ -230,8 +226,6 @@ class ciEnv : StackObj {
ciMethod* get_method_from_handle(Method* method);
- ciInstance* get_or_create_exception(jobject& handle, Symbol* name);
-
// Get a ciMethod representing either an unfound method or
// a method with an unloaded holder. Ensures uniqueness of
// the result.
@@ -402,11 +396,18 @@ class ciEnv : StackObj {
assert(_ArithmeticException_instance != nullptr, "initialization problem");
return _ArithmeticException_instance;
}
-
- // Lazy constructors:
- ciInstance* ArrayIndexOutOfBoundsException_instance();
- ciInstance* ArrayStoreException_instance();
- ciInstance* ClassCastException_instance();
+ ciInstance* ArrayIndexOutOfBoundsException_instance() {
+ assert(_ArrayIndexOutOfBoundsException_instance != nullptr, "initialization problem");
+ return _ArrayIndexOutOfBoundsException_instance;
+ }
+ ciInstance* ArrayStoreException_instance() {
+ assert(_ArrayStoreException_instance != nullptr, "initialization problem");
+ return _ArrayStoreException_instance;
+ }
+ ciInstance* ClassCastException_instance() {
+ assert(_ClassCastException_instance != nullptr, "initialization problem");
+ return _ClassCastException_instance;
+ }
ciInstance* the_null_string();
ciInstance* the_min_jint_string();
diff --git a/src/hotspot/share/ci/ciKlass.hpp b/src/hotspot/share/ci/ciKlass.hpp
index 58a62b248a4e8..10d8395ed7fb9 100644
--- a/src/hotspot/share/ci/ciKlass.hpp
+++ b/src/hotspot/share/ci/ciKlass.hpp
@@ -107,6 +107,13 @@ class ciKlass : public ciType {
return false;
}
+ bool is_in_encoding_range() {
+ Klass* k = get_Klass();
+ bool is_in_encoding_range = CompressedKlassPointers::is_encodable(k);
+ assert(is_in_encoding_range || k->is_interface() || k->is_abstract(), "sanity");
+ return is_in_encoding_range;
+ }
+
// Attempt to get a klass using this ciKlass's loader.
ciKlass* find_klass(ciSymbol* klass_name);
// Note: To find a class from its name string, use ciSymbol::make,
diff --git a/src/hotspot/share/classfile/classFileParser.cpp b/src/hotspot/share/classfile/classFileParser.cpp
index 60fed287df594..c8e95149b7c1a 100644
--- a/src/hotspot/share/classfile/classFileParser.cpp
+++ b/src/hotspot/share/classfile/classFileParser.cpp
@@ -1150,30 +1150,40 @@ static void parse_annotations(const ConstantPool* const cp,
if (AnnotationCollector::_unknown == id) continue;
coll->set_annotation(id);
if (AnnotationCollector::_java_lang_Deprecated == id) {
- assert(count <= 2, "change this if more element-value pairs are added to the @Deprecated annotation");
- // @Deprecated can specify forRemoval=true
+ // @Deprecated can specify forRemoval=true, which we need
+ // to record for JFR to use. If the annotation is not well-formed
+ // then we may not be able to determine that.
const u1* offset = abase + member_off;
- for (int i = 0; i < count; ++i) {
+ // There are only 2 members in @Deprecated.
+ int n_members = MIN2(count, 2);
+ for (int i = 0; i < n_members; ++i) {
int member_index = Bytes::get_Java_u2((address)offset);
offset += 2;
member = check_symbol_at(cp, member_index);
- if (member == vmSymbols::since()) {
- assert(*((address)offset) == s_tag_val, "invariant");
+ if (member == vmSymbols::since() &&
+ (*((address)offset) == s_tag_val)) {
+ // Found `since` first so skip over it
offset += 3;
- continue;
}
- if (member == vmSymbols::for_removal()) {
- assert(*((address)offset) == b_tag_val, "invariant");
+ else if (member == vmSymbols::for_removal() &&
+ (*((address)offset) == b_tag_val)) {
const u2 boolean_value_index = Bytes::get_Java_u2((address)offset + 1);
- if (cp->int_at(boolean_value_index) == 1) {
+ // No guarantee the entry is valid so check it refers to an int in the CP.
+ if (cp->is_within_bounds(boolean_value_index) &&
+ cp->tag_at(boolean_value_index).is_int() &&
+ cp->int_at(boolean_value_index) == 1) {
// forRemoval == true
coll->set_annotation(AnnotationCollector::_java_lang_Deprecated_for_removal);
}
+ break; // no need to check further
+ }
+ else {
+ // This @Deprecated annotation is malformed so we don't try to
+ // determine whether forRemoval is set.
break;
}
-
}
- continue;
+ continue; // proceed to next annotation
}
if (AnnotationCollector::_jdk_internal_vm_annotation_Contended == id) {
@@ -1194,11 +1204,21 @@ static void parse_annotations(const ConstantPool* const cp,
&& s_tag_val == *(abase + tag_off)
&& member == vmSymbols::value_name()) {
group_index = Bytes::get_Java_u2((address)abase + s_con_off);
- if (cp->symbol_at(group_index)->utf8_length() == 0) {
- group_index = 0; // default contended group
+ // No guarantee the group_index is valid so check it refers to a
+ // symbol in the CP.
+ if (cp->is_within_bounds(group_index) &&
+ cp->tag_at(group_index).is_utf8()) {
+ // Seems valid, so check for empty string and reset
+ if (cp->symbol_at(group_index)->utf8_length() == 0) {
+ group_index = 0; // default contended group
+ }
+ } else {
+ // Not valid so use the default
+ group_index = 0;
}
}
coll->set_contended_group(group_index);
+ continue; // proceed to next annotation
}
}
}
diff --git a/src/hotspot/share/classfile/classLoader.cpp b/src/hotspot/share/classfile/classLoader.cpp
index e410824e3001c..9a68e2640443f 100644
--- a/src/hotspot/share/classfile/classLoader.cpp
+++ b/src/hotspot/share/classfile/classLoader.cpp
@@ -81,6 +81,9 @@
#include "utilities/ostream.hpp"
#include "utilities/utf8.hpp"
+#include
+#include
+
// Entry point in java.dll for path canonicalization
typedef int (*canonicalize_fn_t)(const char *orig, char *out, int len);
@@ -579,6 +582,8 @@ void ClassLoader::setup_module_search_path(JavaThread* current, const char* path
new_entry = create_class_path_entry(current, path, &st,
false /*is_boot_append */, false /* from_class_path_attr */);
if (new_entry != nullptr) {
+ // ClassLoaderExt::process_module_table() filters out non-jar entries before calling this function.
+ assert(new_entry->is_jar_file(), "module path entry %s is not a jar file", new_entry->name());
add_to_module_path_entries(path, new_entry);
}
}
@@ -834,7 +839,8 @@ bool ClassLoader::add_to_app_classpath_entries(JavaThread* current,
ClassPathEntry* e = _app_classpath_entries;
if (check_for_duplicates) {
while (e != nullptr) {
- if (strcmp(e->name(), entry->name()) == 0) {
+ if (strcmp(e->name(), entry->name()) == 0 &&
+ e->from_class_path_attr() == entry->from_class_path_attr()) {
// entry already exists
return false;
}
@@ -1208,7 +1214,7 @@ InstanceKlass* ClassLoader::load_class(Symbol* name, PackageEntry* pkg_entry, bo
}
#if INCLUDE_CDS
-char* ClassLoader::skip_uri_protocol(char* source) {
+static const char* skip_uri_protocol(const char* source) {
if (strncmp(source, "file:", 5) == 0) {
// file: protocol path could start with file:/ or file:///
// locate the char after all the forward slashes
@@ -1227,6 +1233,47 @@ char* ClassLoader::skip_uri_protocol(char* source) {
return source;
}
+static char decode_percent_encoded(const char *str, size_t& index) {
+ if (str[index] == '%'
+ && isxdigit(str[index + 1])
+ && isxdigit(str[index + 2])) {
+ char hex[3];
+ hex[0] = str[index + 1];
+ hex[1] = str[index + 2];
+ hex[2] = '\0';
+ index += 2;
+ return (char) strtol(hex, NULL, 16);
+ }
+ return str[index];
+}
+
+char* ClassLoader::uri_to_path(const char* uri) {
+ const size_t len = strlen(uri) + 1;
+ char* path = NEW_RESOURCE_ARRAY(char, len);
+
+ uri = skip_uri_protocol(uri);
+
+ if (strncmp(uri, "//", 2) == 0) {
+ // Skip the empty "authority" part
+ uri += 2;
+ }
+
+#ifdef _WINDOWS
+ if (uri[0] == '/') {
+ // Absolute path name on Windows does not begin with a slash
+ uri += 1;
+ }
+#endif
+
+ size_t path_index = 0;
+ for (size_t i = 0; i < strlen(uri); ++i) {
+ char decoded = decode_percent_encoded(uri, i);
+ path[path_index++] = decoded;
+ }
+ path[path_index] = '\0';
+ return path;
+}
+
// Record the shared classpath index and loader type for classes loaded
// by the builtin loaders at dump time.
void ClassLoader::record_result(JavaThread* current, InstanceKlass* ik,
@@ -1260,7 +1307,7 @@ void ClassLoader::record_result(JavaThread* current, InstanceKlass* ik,
// Save the path from the file: protocol or the module name from the jrt: protocol
// if no protocol prefix is found, path is the same as stream->source(). This path
// must be valid since the class has been successfully parsed.
- char* path = skip_uri_protocol(src);
+ const char* path = ClassLoader::uri_to_path(src);
assert(path != nullptr, "sanity");
for (int i = 0; i < FileMapInfo::get_number_of_shared_paths(); i++) {
SharedClassPathEntry* ent = FileMapInfo::shared_path(i);
diff --git a/src/hotspot/share/classfile/classLoader.hpp b/src/hotspot/share/classfile/classLoader.hpp
index af625082ddabf..e44059b724769 100644
--- a/src/hotspot/share/classfile/classLoader.hpp
+++ b/src/hotspot/share/classfile/classLoader.hpp
@@ -382,7 +382,7 @@ class ClassLoader: AllStatic {
// entries during shared classpath setup time.
static int num_module_path_entries();
static void exit_with_path_failure(const char* error, const char* message);
- static char* skip_uri_protocol(char* source);
+ static char* uri_to_path(const char* uri);
static void record_result(JavaThread* current, InstanceKlass* ik,
const ClassFileStream* stream, bool redefined);
#endif
diff --git a/src/hotspot/share/classfile/classLoaderExt.cpp b/src/hotspot/share/classfile/classLoaderExt.cpp
index 78e29f990d7d1..16981669deb3a 100644
--- a/src/hotspot/share/classfile/classLoaderExt.cpp
+++ b/src/hotspot/share/classfile/classLoaderExt.cpp
@@ -55,6 +55,7 @@
jshort ClassLoaderExt::_app_class_paths_start_index = ClassLoaderExt::max_classpath_index;
jshort ClassLoaderExt::_app_module_paths_start_index = ClassLoaderExt::max_classpath_index;
jshort ClassLoaderExt::_max_used_path_index = 0;
+int ClassLoaderExt::_num_module_paths = 0;
bool ClassLoaderExt::_has_app_classes = false;
bool ClassLoaderExt::_has_platform_classes = false;
bool ClassLoaderExt::_has_non_jar_in_classpath = false;
@@ -89,23 +90,25 @@ void ClassLoaderExt::setup_app_search_path(JavaThread* current) {
os::free(app_class_path);
}
+int ClassLoaderExt::compare_module_path_by_name(const char** p1, const char** p2) {
+ return strcmp(*p1, *p2);
+}
+
void ClassLoaderExt::process_module_table(JavaThread* current, ModuleEntryTable* met) {
ResourceMark rm(current);
- GrowableArray* module_paths = new GrowableArray(5);
+ GrowableArray* module_paths = new GrowableArray(5);
class ModulePathsGatherer : public ModuleClosure {
JavaThread* _current;
- GrowableArray* _module_paths;
+ GrowableArray* _module_paths;
public:
- ModulePathsGatherer(JavaThread* current, GrowableArray* module_paths) :
+ ModulePathsGatherer(JavaThread* current, GrowableArray* module_paths) :
_current(current), _module_paths(module_paths) {}
void do_module(ModuleEntry* m) {
- char* path = m->location()->as_C_string();
- if (strncmp(path, "file:", 5) == 0) {
- path = ClassLoader::skip_uri_protocol(path);
- char* path_copy = NEW_RESOURCE_ARRAY(char, strlen(path) + 1);
- strcpy(path_copy, path);
- _module_paths->append(path_copy);
+ char* uri = m->location()->as_C_string();
+ if (strncmp(uri, "file:", 5) == 0) {
+ char* path = ClassLoader::uri_to_path(uri);
+ extract_jar_files_from_path(path, _module_paths);
}
}
};
@@ -116,6 +119,10 @@ void ClassLoaderExt::process_module_table(JavaThread* current, ModuleEntryTable*
met->modules_do(&gatherer);
}
+ // Sort the module paths before storing into CDS archive for simpler
+ // checking at runtime.
+ module_paths->sort(compare_module_path_by_name);
+
for (int i = 0; i < module_paths->length(); i++) {
ClassLoader::setup_module_search_path(current, module_paths->at(i));
}
@@ -131,6 +138,38 @@ void ClassLoaderExt::setup_module_paths(JavaThread* current) {
process_module_table(current, met);
}
+bool ClassLoaderExt::has_jar_suffix(const char* filename) {
+ // In jdk.internal.module.ModulePath.readModule(), it checks for the ".jar" suffix.
+ // Performing the same check here.
+ const char* dot = strrchr(filename, '.');
+ if (dot != nullptr && strcmp(dot + 1, "jar") == 0) {
+ return true;
+ }
+ return false;
+}
+
+void ClassLoaderExt::extract_jar_files_from_path(const char* path, GrowableArray* module_paths) {
+ DIR* dirp = os::opendir(path);
+ if (dirp == nullptr && errno == ENOTDIR && has_jar_suffix(path)) {
+ module_paths->append(path);
+ } else {
+ if (dirp != nullptr) {
+ struct dirent* dentry;
+ while ((dentry = os::readdir(dirp)) != nullptr) {
+ const char* file_name = dentry->d_name;
+ if (has_jar_suffix(file_name)) {
+ size_t full_name_len = strlen(path) + strlen(file_name) + strlen(os::file_separator()) + 1;
+ char* full_name = NEW_RESOURCE_ARRAY(char, full_name_len);
+ int n = os::snprintf(full_name, full_name_len, "%s%s%s", path, os::file_separator(), file_name);
+ assert((size_t)n == full_name_len - 1, "Unexpected number of characters in string");
+ module_paths->append(full_name);
+ }
+ }
+ os::closedir(dirp);
+ }
+ }
+}
+
char* ClassLoaderExt::read_manifest(JavaThread* current, ClassPathEntry* entry,
jint *manifest_size, bool clean_text) {
const char* name = "META-INF/MANIFEST.MF";
@@ -213,6 +252,15 @@ void ClassLoaderExt::process_jar_manifest(JavaThread* current, ClassPathEntry* e
char sep = os::file_separator()[0];
const char* dir_name = entry->name();
const char* dir_tail = strrchr(dir_name, sep);
+#ifdef _WINDOWS
+ // On Windows, we also support forward slash as the file separator when locating entries in the classpath entry.
+ const char* dir_tail2 = strrchr(dir_name, '/');
+ if (dir_tail == nullptr) {
+ dir_tail = dir_tail2;
+ } else if (dir_tail2 != nullptr && dir_tail2 > dir_tail) {
+ dir_tail = dir_tail2;
+ }
+#endif
int dir_len;
if (dir_tail == nullptr) {
dir_len = 0;
diff --git a/src/hotspot/share/classfile/classLoaderExt.hpp b/src/hotspot/share/classfile/classLoaderExt.hpp
index b76ce3ff33a32..c3c0b00d55e43 100644
--- a/src/hotspot/share/classfile/classLoaderExt.hpp
+++ b/src/hotspot/share/classfile/classLoaderExt.hpp
@@ -53,12 +53,15 @@ class ClassLoaderExt: public ClassLoader { // AllStatic
static jshort _app_module_paths_start_index;
// the largest path index being used during CDS dump time
static jshort _max_used_path_index;
+ // number of module paths
+ static int _num_module_paths;
static bool _has_app_classes;
static bool _has_platform_classes;
static bool _has_non_jar_in_classpath;
static char* read_manifest(JavaThread* current, ClassPathEntry* entry, jint *manifest_size, bool clean_text);
+ static bool has_jar_suffix(const char* filename);
public:
static void process_jar_manifest(JavaThread* current, ClassPathEntry* entry);
@@ -68,6 +71,8 @@ class ClassLoaderExt: public ClassLoader { // AllStatic
static void setup_search_paths(JavaThread* current);
static void setup_module_paths(JavaThread* current);
+ static void extract_jar_files_from_path(const char* path, GrowableArray* module_paths);
+ static int compare_module_path_by_name(const char** p1, const char** p2);
static char* read_manifest(JavaThread* current, ClassPathEntry* entry, jint *manifest_size) {
// Remove all the new-line continuations (which wrap long lines at 72 characters, see
@@ -87,6 +92,8 @@ class ClassLoaderExt: public ClassLoader { // AllStatic
static jshort max_used_path_index() { return _max_used_path_index; }
+ static int num_module_paths() { return _num_module_paths; }
+
static void set_max_used_path_index(jshort used_index) {
_max_used_path_index = used_index;
}
@@ -99,6 +106,10 @@ class ClassLoaderExt: public ClassLoader { // AllStatic
_app_module_paths_start_index = module_start;
}
+ static void init_num_module_paths(int num_module_paths) {
+ _num_module_paths = num_module_paths;
+ }
+
static bool is_boot_classpath(int classpath_index) {
return classpath_index < _app_class_paths_start_index;
}
diff --git a/src/hotspot/share/classfile/javaClasses.cpp b/src/hotspot/share/classfile/javaClasses.cpp
index b6ef682ae0965..0ad36cd21dbf3 100644
--- a/src/hotspot/share/classfile/javaClasses.cpp
+++ b/src/hotspot/share/classfile/javaClasses.cpp
@@ -3052,9 +3052,10 @@ void java_lang_ClassFrameInfo::serialize_offsets(SerializeClosure* f) {
static int get_flags(const methodHandle& m) {
int flags = (jushort)( m->access_flags().as_short() & JVM_RECOGNIZED_METHOD_MODIFIERS );
- if (m->is_initializer()) {
+ if (m->is_object_initializer()) {
flags |= java_lang_invoke_MemberName::MN_IS_CONSTRUCTOR;
} else {
+ // Note: Static initializers can be here. Record them as plain methods.
flags |= java_lang_invoke_MemberName::MN_IS_METHOD;
}
if (m->caller_sensitive()) {
diff --git a/src/hotspot/share/classfile/systemDictionary.cpp b/src/hotspot/share/classfile/systemDictionary.cpp
index b9a559cf9779f..7b307a0b8a37c 100644
--- a/src/hotspot/share/classfile/systemDictionary.cpp
+++ b/src/hotspot/share/classfile/systemDictionary.cpp
@@ -1069,7 +1069,7 @@ bool SystemDictionary::check_shared_class_super_type(InstanceKlass* klass, Insta
}
Klass *found = resolve_with_circularity_detection(klass->name(), super_type->name(),
- class_loader, protection_domain, is_superclass, CHECK_0);
+ class_loader, protection_domain, is_superclass, CHECK_false);
if (found == super_type) {
return true;
} else {
@@ -1088,16 +1088,21 @@ bool SystemDictionary::check_shared_class_super_types(InstanceKlass* ik, Handle
// If unexpected superclass or interfaces are found, we cannot
// load from the shared archive.
- if (ik->super() != nullptr &&
- !check_shared_class_super_type(ik, InstanceKlass::cast(ik->super()),
- class_loader, protection_domain, true, THREAD)) {
- return false;
+ if (ik->super() != nullptr) {
+ bool check_super = check_shared_class_super_type(ik, InstanceKlass::cast(ik->super()),
+ class_loader, protection_domain, true,
+ CHECK_false);
+ if (!check_super) {
+ return false;
+ }
}
Array* interfaces = ik->local_interfaces();
int num_interfaces = interfaces->length();
for (int index = 0; index < num_interfaces; index++) {
- if (!check_shared_class_super_type(ik, interfaces->at(index), class_loader, protection_domain, false, THREAD)) {
+ bool check_interface = check_shared_class_super_type(ik, interfaces->at(index), class_loader, protection_domain, false,
+ CHECK_false);
+ if (!check_interface) {
return false;
}
}
@@ -1149,10 +1154,13 @@ InstanceKlass* SystemDictionary::load_shared_class(InstanceKlass* ik,
Symbol* class_name = ik->name();
if (!is_shared_class_visible(class_name, ik, pkg_entry, class_loader)) {
+ ik->set_shared_loading_failed();
return nullptr;
}
- if (!check_shared_class_super_types(ik, class_loader, protection_domain, THREAD)) {
+ bool check = check_shared_class_super_types(ik, class_loader, protection_domain, CHECK_NULL);
+ if (!check) {
+ ik->set_shared_loading_failed();
return nullptr;
}
diff --git a/src/hotspot/share/classfile/systemDictionary.hpp b/src/hotspot/share/classfile/systemDictionary.hpp
index ee50aa38dd0cf..04980291716c7 100644
--- a/src/hotspot/share/classfile/systemDictionary.hpp
+++ b/src/hotspot/share/classfile/systemDictionary.hpp
@@ -293,13 +293,6 @@ class SystemDictionary : AllStatic {
const char* message);
static const char* find_nest_host_error(const constantPoolHandle& pool, int which);
-protected:
- static InstanceKlass* _well_known_klasses[];
-
-private:
- // table of box klasses (int_klass, etc.)
- static InstanceKlass* _box_klasses[T_VOID+1];
-
static OopHandle _java_system_loader;
static OopHandle _java_platform_loader;
diff --git a/src/hotspot/share/classfile/vmClasses.cpp b/src/hotspot/share/classfile/vmClasses.cpp
index 0b9b437c67b78..b62d699dfe20e 100644
--- a/src/hotspot/share/classfile/vmClasses.cpp
+++ b/src/hotspot/share/classfile/vmClasses.cpp
@@ -45,14 +45,6 @@ InstanceKlass* vmClasses::_klasses[static_cast(vmClassID::LIMIT)]
= { nullptr /*, nullptr...*/ };
InstanceKlass* vmClasses::_box_klasses[T_VOID+1] = { nullptr /*, nullptr...*/ };
-
-// CDS: scan and relocate all classes referenced by _klasses[].
-void vmClasses::metaspace_pointers_do(MetaspaceClosure* it) {
- for (auto id : EnumRange{}) {
- it->push(klass_addr_at(id));
- }
-}
-
bool vmClasses::is_loaded(InstanceKlass* klass) {
return klass != nullptr && klass->is_loaded();
}
@@ -205,8 +197,6 @@ void vmClasses::resolve_all(TRAPS) {
_box_klasses[T_SHORT] = vmClasses::Short_klass();
_box_klasses[T_INT] = vmClasses::Integer_klass();
_box_klasses[T_LONG] = vmClasses::Long_klass();
- //_box_klasses[T_OBJECT] = vmClasses::object_klass();
- //_box_klasses[T_ARRAY] = vmClasses::object_klass();
#ifdef ASSERT
if (CDSConfig::is_using_archive()) {
diff --git a/src/hotspot/share/classfile/vmClasses.hpp b/src/hotspot/share/classfile/vmClasses.hpp
index f2b8c5666eeb1..4fa078c50cd80 100644
--- a/src/hotspot/share/classfile/vmClasses.hpp
+++ b/src/hotspot/share/classfile/vmClasses.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,7 +32,6 @@
class ClassLoaderData;
class InstanceKlass;
-class MetaspaceClosure;
class vmClasses : AllStatic {
friend class VMStructs;
@@ -95,7 +94,6 @@ class vmClasses : AllStatic {
return &_klasses[as_int(id)];
}
- static void metaspace_pointers_do(MetaspaceClosure* it);
static void resolve_all(TRAPS);
static BasicType box_klass_type(Klass* k); // inverse of box_klass
diff --git a/src/hotspot/share/classfile/vmIntrinsics.cpp b/src/hotspot/share/classfile/vmIntrinsics.cpp
index b470eb9b8380d..5e352e42efbc1 100644
--- a/src/hotspot/share/classfile/vmIntrinsics.cpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.cpp
@@ -90,6 +90,7 @@ bool vmIntrinsics::preserves_state(vmIntrinsics::ID id) {
case vmIntrinsics::_dsin:
case vmIntrinsics::_dcos:
case vmIntrinsics::_dtan:
+ case vmIntrinsics::_dtanh:
case vmIntrinsics::_dlog:
case vmIntrinsics::_dlog10:
case vmIntrinsics::_dexp:
@@ -141,6 +142,7 @@ bool vmIntrinsics::can_trap(vmIntrinsics::ID id) {
case vmIntrinsics::_dsin:
case vmIntrinsics::_dcos:
case vmIntrinsics::_dtan:
+ case vmIntrinsics::_dtanh:
case vmIntrinsics::_dlog:
case vmIntrinsics::_dlog10:
case vmIntrinsics::_dexp:
@@ -288,6 +290,7 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
case vmIntrinsics::_dsin:
case vmIntrinsics::_dcos:
case vmIntrinsics::_dtan:
+ case vmIntrinsics::_dtanh:
case vmIntrinsics::_dlog:
case vmIntrinsics::_dexp:
case vmIntrinsics::_dpow:
diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp
index 4b772c171d5a6..b6ce21797a618 100644
--- a/src/hotspot/share/classfile/vmIntrinsics.hpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.hpp
@@ -135,7 +135,7 @@ class methodHandle;
do_name(log_name,"log") do_name(log10_name,"log10") do_name(pow_name,"pow") \
do_name(exp_name,"exp") do_name(min_name,"min") do_name(max_name,"max") \
do_name(floor_name, "floor") do_name(ceil_name, "ceil") do_name(rint_name, "rint") \
- do_name(round_name, "round") \
+ do_name(round_name, "round") do_name(tanh_name,"tanh") \
\
do_name(addExact_name,"addExact") \
do_name(decrementExact_name,"decrementExact") \
@@ -161,6 +161,7 @@ class methodHandle;
do_intrinsic(_dcos, java_lang_Math, cos_name, double_double_signature, F_S) \
do_intrinsic(_dtan, java_lang_Math, tan_name, double_double_signature, F_S) \
do_intrinsic(_datan2, java_lang_Math, atan2_name, double2_double_signature, F_S) \
+ do_intrinsic(_dtanh, java_lang_Math, tanh_name, double_double_signature, F_S) \
do_intrinsic(_dsqrt, java_lang_Math, sqrt_name, double_double_signature, F_S) \
do_intrinsic(_dlog, java_lang_Math, log_name, double_double_signature, F_S) \
do_intrinsic(_dlog10, java_lang_Math, log10_name, double_double_signature, F_S) \
@@ -1007,6 +1008,15 @@ class methodHandle;
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_shuffle_to_vector_name, "shuffleToVector") \
\
+ do_intrinsic(_VectorWrapShuffleIndexes, jdk_internal_vm_vector_VectorSupport, vector_wrap_shuffle_indexes_name, \
+ vector_wrap_shuffle_indexes_sig, F_S) \
+ do_signature(vector_wrap_shuffle_indexes_sig, "(Ljava/lang/Class;" \
+ "Ljava/lang/Class;" \
+ "Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \
+ "ILjdk/internal/vm/vector/VectorSupport$WrapShuffleIndexesOperation;)" \
+ "Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \
+ do_name(vector_wrap_shuffle_indexes_name, "wrapShuffleIndexes") \
+ \
do_intrinsic(_VectorLoadOp, jdk_internal_vm_vector_VectorSupport, vector_load_op_name, vector_load_op_sig, F_S) \
do_signature(vector_load_op_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
@@ -1128,6 +1138,18 @@ class methodHandle;
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_rearrange_name, "rearrangeOp") \
\
+ do_intrinsic(_VectorSelectFrom, jdk_internal_vm_vector_VectorSupport, vector_select_from_name, vector_select_from_sig, F_S) \
+ do_signature(vector_select_from_sig, "(Ljava/lang/Class;" \
+ "Ljava/lang/Class;" \
+ "Ljava/lang/Class;" \
+ "I" \
+ "Ljdk/internal/vm/vector/VectorSupport$Vector;" \
+ "Ljdk/internal/vm/vector/VectorSupport$Vector;" \
+ "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
+ "Ljdk/internal/vm/vector/VectorSupport$VectorSelectFromOp;)" \
+ "Ljdk/internal/vm/vector/VectorSupport$Vector;") \
+ do_name(vector_select_from_name, "selectFromOp") \
+ \
do_intrinsic(_VectorExtract, jdk_internal_vm_vector_VectorSupport, vector_extract_name, vector_extract_sig, F_S) \
do_signature(vector_extract_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
diff --git a/src/hotspot/share/code/codeBlob.cpp b/src/hotspot/share/code/codeBlob.cpp
index 81c4d001078cb..23f621ffec832 100644
--- a/src/hotspot/share/code/codeBlob.cpp
+++ b/src/hotspot/share/code/codeBlob.cpp
@@ -41,7 +41,7 @@
#include "runtime/handles.inline.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/javaFrameAnchor.hpp"
-#include "runtime/jniHandles.hpp"
+#include "runtime/jniHandles.inline.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/safepoint.hpp"
#include "runtime/sharedRuntime.hpp"
@@ -623,7 +623,7 @@ UpcallStub* UpcallStub::create(const char* name, CodeBuffer* cb, jobject receive
// Track memory usage statistic after releasing CodeCache_lock
MemoryService::track_code_cache_memory_usage();
- trace_new_stub(blob, "UpcallStub");
+ trace_new_stub(blob, "UpcallStub - ", name);
return blob;
}
@@ -772,6 +772,10 @@ void UpcallStub::verify() {
void UpcallStub::print_on(outputStream* st) const {
RuntimeBlob::print_on(st);
print_value_on(st);
+ st->print_cr("Frame data offset: %d", (int) _frame_data_offset);
+ oop recv = JNIHandles::resolve(_receiver);
+ st->print("Receiver MH=");
+ recv->print_on(st);
Disassembler::decode((RuntimeBlob*)this, st);
}
diff --git a/src/hotspot/share/code/codeCache.cpp b/src/hotspot/share/code/codeCache.cpp
index 11e070862fe2f..f0769e240ddbd 100644
--- a/src/hotspot/share/code/codeCache.cpp
+++ b/src/hotspot/share/code/codeCache.cpp
@@ -1767,9 +1767,13 @@ void CodeCache::print_codelist(outputStream* st) {
nmethod* nm = iter.method();
ResourceMark rm;
char* method_name = nm->method()->name_and_sig_as_C_string();
- st->print_cr("%d %d %d %s [" INTPTR_FORMAT ", " INTPTR_FORMAT " - " INTPTR_FORMAT "]",
+ const char* jvmci_name = nullptr;
+#if INCLUDE_JVMCI
+ jvmci_name = nm->jvmci_name();
+#endif
+ st->print_cr("%d %d %d %s%s%s [" INTPTR_FORMAT ", " INTPTR_FORMAT " - " INTPTR_FORMAT "]",
nm->compile_id(), nm->comp_level(), nm->get_state(),
- method_name,
+ method_name, jvmci_name ? " jvmci_name=" : "", jvmci_name ? jvmci_name : "",
(intptr_t)nm->header_begin(), (intptr_t)nm->code_begin(), (intptr_t)nm->code_end());
}
}
@@ -1811,12 +1815,20 @@ void CodeCache::write_perf_map(const char* filename, outputStream* st) {
while (iter.next()) {
CodeBlob *cb = iter.method();
ResourceMark rm;
- const char* method_name =
- cb->is_nmethod() ? cb->as_nmethod()->method()->external_name()
- : cb->name();
- fs.print_cr(INTPTR_FORMAT " " INTPTR_FORMAT " %s",
+ const char* method_name = nullptr;
+ const char* jvmci_name = nullptr;
+ if (cb->is_nmethod()) {
+ nmethod* nm = cb->as_nmethod();
+ method_name = nm->method()->external_name();
+#if INCLUDE_JVMCI
+ jvmci_name = nm->jvmci_name();
+#endif
+ } else {
+ method_name = cb->name();
+ }
+ fs.print_cr(INTPTR_FORMAT " " INTPTR_FORMAT " %s%s%s",
(intptr_t)cb->code_begin(), (intptr_t)cb->code_size(),
- method_name);
+ method_name, jvmci_name ? " jvmci_name=" : "", jvmci_name ? jvmci_name : "");
}
}
#endif // LINUX
diff --git a/src/hotspot/share/code/codeHeapState.cpp b/src/hotspot/share/code/codeHeapState.cpp
index 48c8410ac47fc..1ea3f258dd229 100644
--- a/src/hotspot/share/code/codeHeapState.cpp
+++ b/src/hotspot/share/code/codeHeapState.cpp
@@ -735,7 +735,16 @@ void CodeHeapState::aggregate(outputStream* out, CodeHeap* heap, size_t granular
} else {
blob_name = os::strdup(cb->name());
}
-
+#if INCLUDE_JVMCI
+ const char* jvmci_name = nm->jvmci_name();
+ if (jvmci_name != nullptr) {
+ size_t size = ::strlen(blob_name) + ::strlen(" jvmci_name=") + ::strlen(jvmci_name) + 1;
+ char* new_blob_name = (char*)os::malloc(size, mtInternal);
+ os::snprintf(new_blob_name, size, "%s jvmci_name=%s", blob_name, jvmci_name);
+ os::free((void*)blob_name);
+ blob_name = new_blob_name;
+ }
+#endif
nm_size = nm->total_size();
compile_id = nm->compile_id();
comp_lvl = (CompLevel)(nm->comp_level());
@@ -2184,6 +2193,12 @@ void CodeHeapState::print_names(outputStream* out, CodeHeap* heap) {
ast->print("%s.", classNameS);
ast->print("%s", methNameS);
ast->print("%s", methSigS);
+#if INCLUDE_JVMCI
+ const char* jvmci_name = nm->jvmci_name();
+ if (jvmci_name != nullptr) {
+ ast->print(" jvmci_name=%s", jvmci_name);
+ }
+#endif
} else {
ast->print("%s", blob_name);
}
diff --git a/src/hotspot/share/code/compiledIC.cpp b/src/hotspot/share/code/compiledIC.cpp
index 079c8199b1870..684aee509ee53 100644
--- a/src/hotspot/share/code/compiledIC.cpp
+++ b/src/hotspot/share/code/compiledIC.cpp
@@ -83,6 +83,7 @@ void CompiledICData::initialize(CallInfo* call_info, Klass* receiver_klass) {
_speculated_klass = (uintptr_t)receiver_klass;
}
if (call_info->call_kind() == CallInfo::itable_call) {
+ assert(call_info->resolved_method() != nullptr, "virtual or interface method must be found");
_itable_defc_klass = call_info->resolved_method()->method_holder();
_itable_refc_klass = call_info->resolved_klass();
}
@@ -238,6 +239,7 @@ void CompiledIC::set_to_megamorphic(CallInfo* call_info) {
return;
}
#ifdef ASSERT
+ assert(call_info->resolved_method() != nullptr, "virtual or interface method must be found");
int index = call_info->resolved_method()->itable_index();
assert(index == itable_index, "CallInfo pre-computes this");
InstanceKlass* k = call_info->resolved_method()->method_holder();
@@ -254,6 +256,7 @@ void CompiledIC::set_to_megamorphic(CallInfo* call_info) {
}
}
+ assert(call_info->selected_method() != nullptr, "virtual or interface method must be found");
log_trace(inlinecache)("IC@" INTPTR_FORMAT ": to megamorphic %s entry: " INTPTR_FORMAT,
p2i(_call->instruction_address()), call_info->selected_method()->print_value_string(), p2i(entry));
@@ -290,7 +293,7 @@ bool CompiledIC::is_monomorphic() const {
}
bool CompiledIC::is_megamorphic() const {
- return VtableStubs::entry_point(destination()) != nullptr;;
+ return VtableStubs::entry_point(destination()) != nullptr;
}
bool CompiledIC::is_speculated_klass(Klass* receiver_klass) {
diff --git a/src/hotspot/share/code/dependencyContext.cpp b/src/hotspot/share/code/dependencyContext.cpp
index d7ce8e92acf37..0e6b99d172dcb 100644
--- a/src/hotspot/share/code/dependencyContext.cpp
+++ b/src/hotspot/share/code/dependencyContext.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -227,6 +227,10 @@ void DependencyContext::remove_and_mark_for_deoptimization_all_dependents(Deopti
}
#ifndef PRODUCT
+bool DependencyContext::is_empty() {
+ return dependencies() == nullptr;
+}
+
void DependencyContext::print_dependent_nmethods(bool verbose) {
int idx = 0;
for (nmethodBucket* b = dependencies_not_unloading(); b != nullptr; b = b->next_not_unloading()) {
diff --git a/src/hotspot/share/code/dependencyContext.hpp b/src/hotspot/share/code/dependencyContext.hpp
index e8d2ac41d0d1d..13b845cb59dde 100644
--- a/src/hotspot/share/code/dependencyContext.hpp
+++ b/src/hotspot/share/code/dependencyContext.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -124,6 +124,7 @@ class DependencyContext : public StackObj {
#ifndef PRODUCT
void print_dependent_nmethods(bool verbose);
+ bool is_empty();
#endif //PRODUCT
bool is_dependent_nmethod(nmethod* nm);
};
diff --git a/src/hotspot/share/compiler/compilerDefinitions.cpp b/src/hotspot/share/compiler/compilerDefinitions.cpp
index ee0c73254f180..7b091d8ade50c 100644
--- a/src/hotspot/share/compiler/compilerDefinitions.cpp
+++ b/src/hotspot/share/compiler/compilerDefinitions.cpp
@@ -497,11 +497,6 @@ bool CompilerConfig::check_args_consistency(bool status) {
"Invalid NonNMethodCodeHeapSize=%dK. Must be at least %uK.\n", NonNMethodCodeHeapSize/K,
min_code_cache_size/K);
status = false;
- } else if (InlineCacheBufferSize > NonNMethodCodeHeapSize / 2) {
- jio_fprintf(defaultStream::error_stream(),
- "Invalid InlineCacheBufferSize=" SIZE_FORMAT "K. Must be less than or equal to " SIZE_FORMAT "K.\n",
- InlineCacheBufferSize/K, NonNMethodCodeHeapSize/2/K);
- status = false;
}
#ifdef _LP64
diff --git a/src/hotspot/share/compiler/oopMap.cpp b/src/hotspot/share/compiler/oopMap.cpp
index 09b9feee3db57..376057aa72e25 100644
--- a/src/hotspot/share/compiler/oopMap.cpp
+++ b/src/hotspot/share/compiler/oopMap.cpp
@@ -246,10 +246,13 @@ class OopMapSort {
};
void OopMapSort::sort() {
+#ifdef ASSERT
for (OopMapStream oms(_map); !oms.is_done(); oms.next()) {
OopMapValue omv = oms.current();
- assert(omv.type() == OopMapValue::oop_value || omv.type() == OopMapValue::narrowoop_value || omv.type() == OopMapValue::derived_oop_value || omv.type() == OopMapValue::callee_saved_value, "");
+ assert(omv.type() == OopMapValue::oop_value || omv.type() == OopMapValue::narrowoop_value ||
+ omv.type() == OopMapValue::derived_oop_value || omv.type() == OopMapValue::callee_saved_value, "");
}
+#endif
for (OopMapStream oms(_map); !oms.is_done(); oms.next()) {
if (oms.current().type() == OopMapValue::callee_saved_value) {
diff --git a/src/hotspot/share/compiler/oopMap.inline.hpp b/src/hotspot/share/compiler/oopMap.inline.hpp
index f2a3b3ba834df..05ef53f823142 100644
--- a/src/hotspot/share/compiler/oopMap.inline.hpp
+++ b/src/hotspot/share/compiler/oopMap.inline.hpp
@@ -66,12 +66,10 @@ void OopMapDo::iterate_oops_do(const frame
continue;
#ifndef COMPILER2
- COMPILER1_PRESENT(ShouldNotReachHere();)
#if INCLUDE_JVMCI
- if (UseJVMCICompiler) {
- ShouldNotReachHere();
- }
+ if (!EnableJVMCI)
#endif
+ ShouldNotReachHere();
#endif // !COMPILER2
address loc = fr->oopmapreg_to_location(omv.reg(), reg_map);
diff --git a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp
index f82b5cfcc556a..449ff2e4acf8b 100644
--- a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp
+++ b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -219,8 +219,8 @@ class C1G1PostBarrierCodeGenClosure : public StubAssemblerCodeGenClosure {
void G1BarrierSetC1::generate_c1_runtime_stubs(BufferBlob* buffer_blob) {
C1G1PreBarrierCodeGenClosure pre_code_gen_cl;
C1G1PostBarrierCodeGenClosure post_code_gen_cl;
- _pre_barrier_c1_runtime_code_blob = Runtime1::generate_blob(buffer_blob, -1, "g1_pre_barrier_slow",
+ _pre_barrier_c1_runtime_code_blob = Runtime1::generate_blob(buffer_blob, C1StubId::NO_STUBID, "g1_pre_barrier_slow",
false, &pre_code_gen_cl);
- _post_barrier_c1_runtime_code_blob = Runtime1::generate_blob(buffer_blob, -1, "g1_post_barrier_slow",
+ _post_barrier_c1_runtime_code_blob = Runtime1::generate_blob(buffer_blob, C1StubId::NO_STUBID, "g1_post_barrier_slow",
false, &post_code_gen_cl);
}
diff --git a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
index 13b993546cde4..8e17d1d2a7a4e 100644
--- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
+++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
@@ -24,49 +24,32 @@
#include "precompiled.hpp"
#include "classfile/javaClasses.hpp"
+#include "code/vmreg.inline.hpp"
#include "gc/g1/c2/g1BarrierSetC2.hpp"
#include "gc/g1/g1BarrierSet.hpp"
+#include "gc/g1/g1BarrierSetAssembler.hpp"
#include "gc/g1/g1BarrierSetRuntime.hpp"
#include "gc/g1/g1CardTable.hpp"
#include "gc/g1/g1ThreadLocalData.hpp"
#include "gc/g1/g1HeapRegion.hpp"
#include "opto/arraycopynode.hpp"
+#include "opto/block.hpp"
#include "opto/compile.hpp"
#include "opto/escape.hpp"
#include "opto/graphKit.hpp"
#include "opto/idealKit.hpp"
+#include "opto/machnode.hpp"
#include "opto/macro.hpp"
+#include "opto/memnode.hpp"
+#include "opto/node.hpp"
+#include "opto/output.hpp"
+#include "opto/regalloc.hpp"
#include "opto/rootnode.hpp"
+#include "opto/runtime.hpp"
#include "opto/type.hpp"
+#include "utilities/growableArray.hpp"
#include "utilities/macros.hpp"
-const TypeFunc *G1BarrierSetC2::write_ref_field_pre_entry_Type() {
- const Type **fields = TypeTuple::fields(2);
- fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value
- fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread
- const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
-
- // create result type (range)
- fields = TypeTuple::fields(0);
- const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
-
- return TypeFunc::make(domain, range);
-}
-
-const TypeFunc *G1BarrierSetC2::write_ref_field_post_entry_Type() {
- const Type **fields = TypeTuple::fields(2);
- fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Card addr
- fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread
- const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
-
- // create result type (range)
- fields = TypeTuple::fields(0);
- const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
-
- return TypeFunc::make(domain, range);
-}
-
-#define __ ideal.
/*
* Determine if the G1 pre-barrier can be removed. The pre-barrier is
* required by SATB to make sure all objects live at the start of the
@@ -84,8 +67,6 @@ const TypeFunc *G1BarrierSetC2::write_ref_field_post_entry_Type() {
* The compiler needs to determine that the object in which a field is about
* to be written is newly allocated, and that no prior store to the same field
* has happened since the allocation.
- *
- * Returns true if the pre-barrier can be removed
*/
bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit,
PhaseValues* phase,
@@ -97,34 +78,28 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit,
AllocateNode* alloc = AllocateNode::Ideal_allocation(base);
if (offset == Type::OffsetBot) {
- return false; // cannot unalias unless there are precise offsets
+ return false; // Cannot unalias unless there are precise offsets.
}
-
if (alloc == nullptr) {
- return false; // No allocation found
+ return false; // No allocation found.
}
intptr_t size_in_bytes = type2aelembytes(bt);
-
- Node* mem = kit->memory(adr_idx); // start searching here...
+ Node* mem = kit->memory(adr_idx); // Start searching here.
for (int cnt = 0; cnt < 50; cnt++) {
-
if (mem->is_Store()) {
-
Node* st_adr = mem->in(MemNode::Address);
intptr_t st_offset = 0;
Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset);
if (st_base == nullptr) {
- break; // inscrutable pointer
+ break; // Inscrutable pointer.
}
-
- // Break we have found a store with same base and offset as ours so break
if (st_base == base && st_offset == offset) {
+ // We have found a store with same base and offset as ours.
break;
}
-
if (st_offset != offset && st_offset != Type::OffsetBot) {
const int MAX_STORE = BytesPerLong;
if (st_offset >= offset + size_in_bytes ||
@@ -136,20 +111,18 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit,
// in the same sequence of RawMem effects. We sometimes initialize
// a whole 'tile' of array elements with a single jint or jlong.)
mem = mem->in(MemNode::Memory);
- continue; // advance through independent store memory
+ continue; // Advance through independent store memory.
}
}
-
if (st_base != base
&& MemNode::detect_ptr_independence(base, alloc, st_base,
AllocateNode::Ideal_allocation(st_base),
phase)) {
- // Success: The bases are provably independent.
+ // Success: the bases are provably independent.
mem = mem->in(MemNode::Memory);
- continue; // advance through independent store memory
+ continue; // Advance through independent store memory.
}
} else if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
-
InitializeNode* st_init = mem->in(0)->as_Initialize();
AllocateNode* st_alloc = st_init->allocation();
@@ -157,7 +130,7 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit,
// The alloc variable is guaranteed to not be null here from earlier check.
if (alloc == st_alloc) {
// Check that the initialization is storing null so that no previous store
- // has been moved up and directly write a reference
+ // has been moved up and directly write a reference.
Node* captured_store = st_init->find_captured_store(offset,
type2aelembytes(T_OBJECT),
phase);
@@ -166,164 +139,55 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit,
}
}
}
-
// Unless there is an explicit 'continue', we must bail out here,
// because 'mem' is an inscrutable memory state (e.g., a call).
break;
}
-
return false;
}
-// G1 pre/post barriers
-void G1BarrierSetC2::pre_barrier(GraphKit* kit,
- bool do_load,
- Node* ctl,
- Node* obj,
- Node* adr,
- uint alias_idx,
- Node* val,
- const TypeOopPtr* val_type,
- Node* pre_val,
- BasicType bt) const {
- // Some sanity checks
- // Note: val is unused in this routine.
-
- if (do_load) {
- // We need to generate the load of the previous value
- assert(obj != nullptr, "must have a base");
- assert(adr != nullptr, "where are loading from?");
- assert(pre_val == nullptr, "loaded already?");
- assert(val_type != nullptr, "need a type");
-
- if (use_ReduceInitialCardMarks()
- && g1_can_remove_pre_barrier(kit, &kit->gvn(), adr, bt, alias_idx)) {
- return;
- }
-
- } else {
- // In this case both val_type and alias_idx are unused.
- assert(pre_val != nullptr, "must be loaded already");
- // Nothing to be done if pre_val is null.
- if (pre_val->bottom_type() == TypePtr::NULL_PTR) return;
- assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here");
- }
- assert(bt == T_OBJECT, "or we shouldn't be here");
-
- IdealKit ideal(kit, true);
-
- Node* tls = __ thread(); // ThreadLocalStorage
-
- Node* no_base = __ top();
- Node* zero = __ ConI(0);
- Node* zeroX = __ ConX(0);
-
- float likely = PROB_LIKELY(0.999);
- float unlikely = PROB_UNLIKELY(0.999);
-
- BasicType active_type = in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE;
- assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 || in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "flag width");
-
- // Offsets into the thread
- const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
- const int index_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
- const int buffer_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
-
- // Now the actual pointers into the thread
- Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset));
- Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset));
- Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset));
-
- // Now some of the values
- Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
-
- // if (!marking)
- __ if_then(marking, BoolTest::ne, zero, unlikely); {
- BasicType index_bt = TypeX_X->basic_type();
- assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading G1 SATBMarkQueue::_index with wrong size.");
- Node* index = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw);
-
- if (do_load) {
- // load original value
- pre_val = __ load(__ ctrl(), adr, val_type, bt, alias_idx, false, MemNode::unordered, LoadNode::Pinned);
- }
-
- // if (pre_val != nullptr)
- __ if_then(pre_val, BoolTest::ne, kit->null()); {
- Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
-
- // is the queue for this thread full?
- __ if_then(index, BoolTest::ne, zeroX, likely); {
-
- // decrement the index
- Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t))));
-
- // Now get the buffer location we will log the previous value into and store it
- Node *log_addr = __ AddP(no_base, buffer, next_index);
- __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered);
- // update the index
- __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered);
-
- } __ else_(); {
-
- // logging buffer is full, call the runtime
- const TypeFunc *tf = write_ref_field_pre_entry_Type();
- __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), "write_ref_field_pre_entry", pre_val, tls);
- } __ end_if(); // (!index)
- } __ end_if(); // (pre_val != nullptr)
- } __ end_if(); // (!marking)
-
- // Final sync IdealKit and GraphKit.
- kit->final_sync(ideal);
-}
-
/*
- * G1 similar to any GC with a Young Generation requires a way to keep track of
- * references from Old Generation to Young Generation to make sure all live
+ * G1, similar to any GC with a Young Generation, requires a way to keep track
+ * of references from Old Generation to Young Generation to make sure all live
* objects are found. G1 also requires to keep track of object references
* between different regions to enable evacuation of old regions, which is done
- * as part of mixed collections. References are tracked in remembered sets and
- * is continuously updated as reference are written to with the help of the
- * post-barrier.
+ * as part of mixed collections. References are tracked in remembered sets,
+ * which are continuously updated as references are written to with the help of
+ * the post-barrier.
*
- * To reduce the number of updates to the remembered set the post-barrier
- * filters updates to fields in objects located in the Young Generation,
- * the same region as the reference, when the null is being written or
- * if the card is already marked as dirty by an earlier write.
+ * To reduce the number of updates to the remembered set, the post-barrier
+ * filters out updates to fields in objects located in the Young Generation, the
+ * same region as the reference, when null is being written, or if the card is
+ * already marked as dirty by an earlier write.
*
* Under certain circumstances it is possible to avoid generating the
- * post-barrier completely if it is possible during compile time to prove
- * the object is newly allocated and that no safepoint exists between the
- * allocation and the store.
- *
- * In the case of slow allocation the allocation code must handle the barrier
- * as part of the allocation in the case the allocated object is not located
- * in the nursery; this would happen for humongous objects.
+ * post-barrier completely, if it is possible during compile time to prove the
+ * object is newly allocated and that no safepoint exists between the allocation
+ * and the store. This can be seen as a compile-time version of the
+ * above-mentioned Young Generation filter.
*
- * Returns true if the post barrier can be removed
+ * In the case of a slow allocation, the allocation code must handle the barrier
+ * as part of the allocation if the allocated object is not located in the
+ * nursery; this would happen for humongous objects.
*/
bool G1BarrierSetC2::g1_can_remove_post_barrier(GraphKit* kit,
- PhaseValues* phase, Node* store,
+ PhaseValues* phase, Node* store_ctrl,
Node* adr) const {
intptr_t offset = 0;
Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset);
AllocateNode* alloc = AllocateNode::Ideal_allocation(base);
if (offset == Type::OffsetBot) {
- return false; // cannot unalias unless there are precise offsets
+ return false; // Cannot unalias unless there are precise offsets.
}
-
if (alloc == nullptr) {
- return false; // No allocation found
+ return false; // No allocation found.
}
- // Start search from Store node
- Node* mem = store->in(MemNode::Control);
+ Node* mem = store_ctrl; // Start search from Store node.
if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
-
InitializeNode* st_init = mem->in(0)->as_Initialize();
AllocateNode* st_alloc = st_init->allocation();
-
// Make sure we are looking at the same allocation
if (alloc == st_alloc) {
return true;
@@ -333,725 +197,361 @@ bool G1BarrierSetC2::g1_can_remove_post_barrier(GraphKit* kit,
return false;
}
-//
-// Update the card table and add card address to the queue
-//
-void G1BarrierSetC2::g1_mark_card(GraphKit* kit,
- IdealKit& ideal,
- Node* card_adr,
- Node* oop_store,
- uint oop_alias_idx,
- Node* index,
- Node* index_adr,
- Node* buffer,
- const TypeFunc* tf) const {
- Node* zero = __ ConI(0);
- Node* zeroX = __ ConX(0);
- Node* no_base = __ top();
- BasicType card_bt = T_BYTE;
- // Smash zero into card. MUST BE ORDERED WRT TO STORE
- __ storeCM(__ ctrl(), card_adr, zero, oop_store, oop_alias_idx, card_bt, Compile::AliasIdxRaw);
-
- // Now do the queue work
- __ if_then(index, BoolTest::ne, zeroX); {
-
- Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t))));
- Node* log_addr = __ AddP(no_base, buffer, next_index);
-
- // Order, see storeCM.
- __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered);
- __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw, MemNode::unordered);
-
- } __ else_(); {
- __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), "write_ref_field_post_entry", card_adr, __ thread());
- } __ end_if();
-
+Node* G1BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
+ DecoratorSet decorators = access.decorators();
+ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+ bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0;
+ // If we are reading the value of the referent field of a Reference object, we
+ // need to record the referent in an SATB log buffer using the pre-barrier
+ // mechanism. Also we need to add a memory barrier to prevent commoning reads
+ // from this field across safepoints, since GC can change its value.
+ bool need_read_barrier = ((on_weak || on_phantom) && !no_keepalive);
+ if (access.is_oop() && need_read_barrier) {
+ access.set_barrier_data(G1C2BarrierPre);
+ }
+ return CardTableBarrierSetC2::load_at_resolved(access, val_type);
}
-void G1BarrierSetC2::post_barrier(GraphKit* kit,
- Node* ctl,
- Node* oop_store,
- Node* obj,
- Node* adr,
- uint alias_idx,
- Node* val,
- BasicType bt,
- bool use_precise) const {
- // If we are writing a null then we need no post barrier
+void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
+ eliminate_gc_barrier_data(node);
+}
- if (val != nullptr && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) {
- // Must be null
- const Type* t = val->bottom_type();
- assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be null");
- // No post barrier if writing null
- return;
+void G1BarrierSetC2::eliminate_gc_barrier_data(Node* node) const {
+ if (node->is_LoadStore()) {
+ LoadStoreNode* loadstore = node->as_LoadStore();
+ loadstore->set_barrier_data(0);
+ } else if (node->is_Mem()) {
+ MemNode* mem = node->as_Mem();
+ mem->set_barrier_data(0);
}
+}
- if (use_ReduceInitialCardMarks() && obj == kit->just_allocated_object(kit->control())) {
- // We can skip marks on a freshly-allocated object in Eden.
- // Keep this code in sync with CardTableBarrierSet::on_slowpath_allocation_exit.
- // That routine informs GC to take appropriate compensating steps,
- // upon a slow-path allocation, so as to make this card-mark
- // elision safe.
+static void refine_barrier_by_new_val_type(const Node* n) {
+ if (n->Opcode() != Op_StoreP &&
+ n->Opcode() != Op_StoreN) {
return;
}
-
- if (use_ReduceInitialCardMarks()
- && g1_can_remove_post_barrier(kit, &kit->gvn(), oop_store, adr)) {
+ MemNode* store = n->as_Mem();
+ const Node* newval = n->in(MemNode::ValueIn);
+ assert(newval != nullptr, "");
+ const Type* newval_bottom = newval->bottom_type();
+ TypePtr::PTR newval_type = newval_bottom->make_ptr()->ptr();
+ uint8_t barrier_data = store->barrier_data();
+ if (!newval_bottom->isa_oopptr() &&
+ !newval_bottom->isa_narrowoop() &&
+ newval_type != TypePtr::Null) {
+ // newval is neither an OOP nor null, so there is no barrier to refine.
+ assert(barrier_data == 0, "non-OOP stores should have no barrier data");
return;
}
-
- if (!use_precise) {
- // All card marks for a (non-array) instance are in one place:
- adr = obj;
+ if (barrier_data == 0) {
+ // No barrier to refine.
+ return;
}
- // (Else it's an array (or unknown), and we want more precise card marks.)
- assert(adr != nullptr, "");
-
- IdealKit ideal(kit, true);
-
- Node* tls = __ thread(); // ThreadLocalStorage
-
- Node* no_base = __ top();
- float likely = PROB_LIKELY_MAG(3);
- float unlikely = PROB_UNLIKELY_MAG(3);
- Node* young_card = __ ConI((jint)G1CardTable::g1_young_card_val());
- Node* dirty_card = __ ConI((jint)G1CardTable::dirty_card_val());
- Node* zeroX = __ ConX(0);
-
- const TypeFunc *tf = write_ref_field_post_entry_Type();
-
- // Offsets into the thread
- const int index_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
- const int buffer_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
-
- // Pointers into the thread
-
- Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset));
- Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset));
-
- // Now some values
- // Use ctrl to avoid hoisting these values past a safepoint, which could
- // potentially reset these fields in the JavaThread.
- Node* index = __ load(__ ctrl(), index_adr, TypeX_X, TypeX_X->basic_type(), Compile::AliasIdxRaw);
- Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
-
- // Convert the store obj pointer to an int prior to doing math on it
- // Must use ctrl to prevent "integerized oop" existing across safepoint
- Node* cast = __ CastPX(__ ctrl(), adr);
-
- // Divide pointer by card size
- Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift()) );
-
- // Combine card table base and card offset
- Node* card_adr = __ AddP(no_base, byte_map_base_node(kit), card_offset );
-
- // If we know the value being stored does it cross regions?
-
- if (val != nullptr) {
- // Does the store cause us to cross regions?
-
- // Should be able to do an unsigned compare of region_size instead of
- // and extra shift. Do we have an unsigned compare??
- // Node* region_size = __ ConI(1 << G1HeapRegion::LogOfHRGrainBytes);
- Node* xor_res = __ URShiftX ( __ XorX( cast, __ CastPX(__ ctrl(), val)), __ ConI(checked_cast(G1HeapRegion::LogOfHRGrainBytes)));
-
- // if (xor_res == 0) same region so skip
- __ if_then(xor_res, BoolTest::ne, zeroX, likely); {
-
- // No barrier if we are storing a null.
- __ if_then(val, BoolTest::ne, kit->null(), likely); {
-
- // Ok must mark the card if not already dirty
-
- // load the original value of the card
- Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
-
- __ if_then(card_val, BoolTest::ne, young_card, unlikely); {
- kit->sync_kit(ideal);
- kit->insert_mem_bar(Op_MemBarVolatile, oop_store);
- __ sync_kit(kit);
-
- Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
- __ if_then(card_val_reload, BoolTest::ne, dirty_card); {
- g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
- } __ end_if();
- } __ end_if();
- } __ end_if();
- } __ end_if();
- } else {
- // The Object.clone() intrinsic uses this path if !ReduceInitialCardMarks.
- // We don't need a barrier here if the destination is a newly allocated object
- // in Eden. Otherwise, GC verification breaks because we assume that cards in Eden
- // are set to 'g1_young_gen' (see G1CardTable::verify_g1_young_region()).
- assert(!use_ReduceInitialCardMarks(), "can only happen with card marking");
- Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
- __ if_then(card_val, BoolTest::ne, young_card); {
- g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
- } __ end_if();
+ if (newval_type == TypePtr::Null) {
+ // Simply elide post-barrier if writing null.
+ barrier_data &= ~G1C2BarrierPost;
+ barrier_data &= ~G1C2BarrierPostNotNull;
+ } else if (((barrier_data & G1C2BarrierPost) != 0) &&
+ newval_type == TypePtr::NotNull) {
+ // If the post-barrier has not been elided yet (e.g. due to newval being
+ // freshly allocated), mark it as not-null (simplifies barrier tests and
+ // compressed OOPs logic).
+ barrier_data |= G1C2BarrierPostNotNull;
}
-
- // Final sync IdealKit and GraphKit.
- kit->final_sync(ideal);
+ store->set_barrier_data(barrier_data);
+ return;
}
-// Helper that guards and inserts a pre-barrier.
-void G1BarrierSetC2::insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset,
- Node* pre_val, bool need_mem_bar) const {
- // We could be accessing the referent field of a reference object. If so, when G1
- // is enabled, we need to log the value in the referent field in an SATB buffer.
- // This routine performs some compile time filters and generates suitable
- // runtime filters that guard the pre-barrier code.
- // Also add memory barrier for non volatile load from the referent field
- // to prevent commoning of loads across safepoint.
-
- // Some compile time checks.
-
- // If offset is a constant, is it java_lang_ref_Reference::_reference_offset?
- const TypeX* otype = offset->find_intptr_t_type();
- if (otype != nullptr && otype->is_con() &&
- otype->get_con() != java_lang_ref_Reference::referent_offset()) {
- // Constant offset but not the reference_offset so just return
- return;
- }
-
- // We only need to generate the runtime guards for instances.
- const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr();
- if (btype != nullptr) {
- if (btype->isa_aryptr()) {
- // Array type so nothing to do
- return;
+// Refine (not really expand) G1 barriers by looking at the new value type
+// (whether it is necessarily null or necessarily non-null).
+bool G1BarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const {
+ ResourceMark rm;
+ VectorSet visited;
+ Node_List worklist;
+ worklist.push(C->root());
+ while (worklist.size() > 0) {
+ Node* n = worklist.pop();
+ if (visited.test_set(n->_idx)) {
+ continue;
}
-
- const TypeInstPtr* itype = btype->isa_instptr();
- if (itype != nullptr) {
- // Can the klass of base_oop be statically determined to be
- // _not_ a sub-class of Reference and _not_ Object?
- ciKlass* klass = itype->instance_klass();
- if (klass->is_loaded() &&
- !klass->is_subtype_of(kit->env()->Reference_klass()) &&
- !kit->env()->Object_klass()->is_subtype_of(klass)) {
- return;
+ refine_barrier_by_new_val_type(n);
+ for (uint j = 0; j < n->req(); j++) {
+ Node* in = n->in(j);
+ if (in != nullptr) {
+ worklist.push(in);
}
}
}
+ return false;
+}
- // The compile time filters did not reject base_oop/offset so
- // we need to generate the following runtime filters
- //
- // if (offset == java_lang_ref_Reference::_reference_offset) {
- // if (instance_of(base, java.lang.ref.Reference)) {
- // pre_barrier(_, pre_val, ...);
+uint G1BarrierSetC2::estimated_barrier_size(const Node* node) const {
+ // These Ideal node counts are extracted from the pre-matching Ideal graph
+ // generated when compiling the following method with early barrier expansion:
+ // static void write(MyObject obj1, Object o) {
+ // obj1.o1 = o;
// }
- // }
-
- float likely = PROB_LIKELY( 0.999);
- float unlikely = PROB_UNLIKELY(0.999);
-
- IdealKit ideal(kit);
-
- Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset());
-
- __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
- // Update graphKit memory and control from IdealKit.
- kit->sync_kit(ideal);
-
- Node* ref_klass_con = kit->makecon(TypeKlassPtr::make(kit->env()->Reference_klass()));
- Node* is_instof = kit->gen_instanceof(base_oop, ref_klass_con);
-
- // Update IdealKit memory and control from graphKit.
- __ sync_kit(kit);
-
- Node* one = __ ConI(1);
- // is_instof == 0 if base_oop == nullptr
- __ if_then(is_instof, BoolTest::eq, one, unlikely); {
-
- // Update graphKit from IdeakKit.
- kit->sync_kit(ideal);
-
- // Use the pre-barrier to record the value in the referent field
- pre_barrier(kit, false /* do_load */,
- __ ctrl(),
- nullptr /* obj */, nullptr /* adr */, max_juint /* alias_idx */, nullptr /* val */, nullptr /* val_type */,
- pre_val /* pre_val */,
- T_OBJECT);
- if (need_mem_bar) {
- // Add memory barrier to prevent commoning reads from this field
- // across safepoint since GC can change its value.
- kit->insert_mem_bar(Op_MemBarCPUOrder);
- }
- // Update IdealKit from graphKit.
- __ sync_kit(kit);
-
- } __ end_if(); // _ref_type != ref_none
- } __ end_if(); // offset == referent_offset
+ uint8_t barrier_data = MemNode::barrier_data(node);
+ uint nodes = 0;
+ if ((barrier_data & G1C2BarrierPre) != 0) {
+ nodes += 50;
+ }
+ if ((barrier_data & G1C2BarrierPost) != 0) {
+ nodes += 60;
+ }
+ return nodes;
+}
- // Final sync IdealKit and GraphKit.
- kit->final_sync(ideal);
+bool G1BarrierSetC2::can_initialize_object(const StoreNode* store) const {
+ assert(store->Opcode() == Op_StoreP || store->Opcode() == Op_StoreN, "OOP store expected");
+ // It is OK to move the store across the object initialization boundary only
+ // if it does not have any barrier, or if it has barriers that can be safely
+ // elided (because of the compensation steps taken on the allocation slow path
+ // when ReduceInitialCardMarks is enabled).
+ return (MemNode::barrier_data(store) == 0) || use_ReduceInitialCardMarks();
}
-#undef __
+void G1BarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
+ if (ac->is_clone_inst() && !use_ReduceInitialCardMarks()) {
+ clone_in_runtime(phase, ac, G1BarrierSetRuntime::clone_addr(), "G1BarrierSetRuntime::clone");
+ return;
+ }
+ BarrierSetC2::clone_at_expansion(phase, ac);
+}
-Node* G1BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
+Node* G1BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
DecoratorSet decorators = access.decorators();
- Node* adr = access.addr().node();
- Node* obj = access.base();
-
- bool anonymous = (decorators & C2_UNSAFE_ACCESS) != 0;
- bool mismatched = (decorators & C2_MISMATCHED) != 0;
- bool unknown = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+ bool anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
bool in_heap = (decorators & IN_HEAP) != 0;
- bool in_native = (decorators & IN_NATIVE) != 0;
- bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
- bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
- bool is_unordered = (decorators & MO_UNORDERED) != 0;
- bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0;
- bool is_mixed = !in_heap && !in_native;
- bool need_cpu_mem_bar = !is_unordered || mismatched || is_mixed;
-
- Node* top = Compile::current()->top();
- Node* offset = adr->is_AddP() ? adr->in(AddPNode::Offset) : top;
-
- // If we are reading the value of the referent field of a Reference
- // object (either by using Unsafe directly or through reflection)
- // then, if G1 is enabled, we need to record the referent in an
- // SATB log buffer using the pre-barrier mechanism.
- // Also we need to add memory barrier to prevent commoning reads
- // from this field across safepoint since GC can change its value.
- bool need_read_barrier = (((on_weak || on_phantom) && !no_keepalive) ||
- (in_heap && unknown && offset != top && obj != top));
+ bool tightly_coupled_alloc = (decorators & C2_TIGHTLY_COUPLED_ALLOC) != 0;
+ bool need_store_barrier = !(tightly_coupled_alloc && use_ReduceInitialCardMarks()) && (in_heap || anonymous);
+ if (access.is_oop() && need_store_barrier) {
+ access.set_barrier_data(get_store_barrier(access));
+ if (tightly_coupled_alloc) {
+ assert(!use_ReduceInitialCardMarks(),
+ "post-barriers are only needed for tightly-coupled initialization stores when ReduceInitialCardMarks is disabled");
+ access.set_barrier_data(access.barrier_data() ^ G1C2BarrierPre);
+ }
+ }
+ return BarrierSetC2::store_at_resolved(access, val);
+}
- if (!access.is_oop() || !need_read_barrier) {
- return CardTableBarrierSetC2::load_at_resolved(access, val_type);
+Node* G1BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
+ Node* new_val, const Type* value_type) const {
+ GraphKit* kit = access.kit();
+ if (!access.is_oop()) {
+ return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
}
+ access.set_barrier_data(G1C2BarrierPre | G1C2BarrierPost);
+ return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
+}
- assert(access.is_parse_access(), "entry not supported at optimization time");
+Node* G1BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
+ Node* new_val, const Type* value_type) const {
+ GraphKit* kit = access.kit();
+ if (!access.is_oop()) {
+ return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
+ }
+ access.set_barrier_data(G1C2BarrierPre | G1C2BarrierPost);
+ return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
+}
- C2ParseAccess& parse_access = static_cast(access);
- GraphKit* kit = parse_access.kit();
- Node* load;
+Node* G1BarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
+ GraphKit* kit = access.kit();
+ if (!access.is_oop()) {
+ return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, value_type);
+ }
+ access.set_barrier_data(G1C2BarrierPre | G1C2BarrierPost);
+ return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, value_type);
+}
- Node* control = kit->control();
- const TypePtr* adr_type = access.addr().type();
- MemNode::MemOrd mo = access.mem_node_mo();
- bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
- bool unaligned = (decorators & C2_UNALIGNED) != 0;
- bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0;
- // Pinned control dependency is the strictest. So it's ok to substitute it for any other.
- load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo,
- LoadNode::Pinned, requires_atomic_access, unaligned, mismatched, unsafe,
- access.barrier_data());
+class G1BarrierSetC2State : public BarrierSetC2State {
+private:
+ GrowableArray* _stubs;
+public:
+ G1BarrierSetC2State(Arena* arena)
+ : BarrierSetC2State(arena),
+ _stubs(new (arena) GrowableArray(arena, 8, 0, nullptr)) {}
- if (on_weak || on_phantom) {
- // Use the pre-barrier to record the value in the referent field
- pre_barrier(kit, false /* do_load */,
- kit->control(),
- nullptr /* obj */, nullptr /* adr */, max_juint /* alias_idx */, nullptr /* val */, nullptr /* val_type */,
- load /* pre_val */, T_OBJECT);
- // Add memory barrier to prevent commoning reads from this field
- // across safepoint since GC can change its value.
- kit->insert_mem_bar(Op_MemBarCPUOrder);
- } else if (unknown) {
- // We do not require a mem bar inside pre_barrier if need_mem_bar
- // is set: the barriers would be emitted by us.
- insert_pre_barrier(kit, obj, offset, load, !need_cpu_mem_bar);
+ GrowableArray* stubs() {
+ return _stubs;
}
- return load;
-}
-
-bool G1BarrierSetC2::is_gc_barrier_node(Node* node) const {
- if (CardTableBarrierSetC2::is_gc_barrier_node(node)) {
- return true;
+ bool needs_liveness_data(const MachNode* mach) const {
+ return G1PreBarrierStubC2::needs_barrier(mach) ||
+ G1PostBarrierStubC2::needs_barrier(mach);
}
- if (node->Opcode() != Op_CallLeaf) {
- return false;
- }
- CallLeafNode *call = node->as_CallLeaf();
- if (call->_name == nullptr) {
+
+ bool needs_livein_data() const {
return false;
}
+};
- return strcmp(call->_name, "write_ref_field_pre_entry") == 0 || strcmp(call->_name, "write_ref_field_post_entry") == 0;
+static G1BarrierSetC2State* barrier_set_state() {
+ return reinterpret_cast(Compile::current()->barrier_set_state());
}
-bool G1BarrierSetC2::is_g1_pre_val_load(Node* n) {
- if (n->is_Load() && n->as_Load()->has_pinned_control_dependency()) {
- // Make sure the only users of it are: CmpP, StoreP, and a call to write_ref_field_pre_entry
+G1BarrierStubC2::G1BarrierStubC2(const MachNode* node) : BarrierStubC2(node) {}
- // Skip possible decode
- if (n->outcnt() == 1 && n->unique_out()->is_DecodeN()) {
- n = n->unique_out();
- }
+G1PreBarrierStubC2::G1PreBarrierStubC2(const MachNode* node) : G1BarrierStubC2(node) {}
- if (n->outcnt() == 3) {
- int found = 0;
- for (SimpleDUIterator iter(n); iter.has_next(); iter.next()) {
- Node* use = iter.get();
- if (use->is_Cmp() || use->is_Store()) {
- ++found;
- } else if (use->is_CallLeaf()) {
- CallLeafNode* call = use->as_CallLeaf();
- if (strcmp(call->_name, "write_ref_field_pre_entry") == 0) {
- ++found;
- }
- }
- }
- if (found == 3) {
- return true;
- }
- }
+bool G1PreBarrierStubC2::needs_barrier(const MachNode* node) {
+ return (node->barrier_data() & G1C2BarrierPre) != 0;
+}
+
+G1PreBarrierStubC2* G1PreBarrierStubC2::create(const MachNode* node) {
+ G1PreBarrierStubC2* const stub = new (Compile::current()->comp_arena()) G1PreBarrierStubC2(node);
+ if (!Compile::current()->output()->in_scratch_emit_size()) {
+ barrier_set_state()->stubs()->append(stub);
}
- return false;
+ return stub;
}
-bool G1BarrierSetC2::is_gc_pre_barrier_node(Node *node) const {
- return is_g1_pre_val_load(node);
+void G1PreBarrierStubC2::initialize_registers(Register obj, Register pre_val, Register thread, Register tmp1, Register tmp2) {
+ _obj = obj;
+ _pre_val = pre_val;
+ _thread = thread;
+ _tmp1 = tmp1;
+ _tmp2 = tmp2;
}
-void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
- if (is_g1_pre_val_load(node)) {
- macro->replace_node(node, macro->zerocon(node->as_Load()->bottom_type()->basic_type()));
- } else {
- assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required");
- assert(node->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes");
- // It could be only one user, URShift node, in Object.clone() intrinsic
- // but the new allocation is passed to arraycopy stub and it could not
- // be scalar replaced. So we don't check the case.
+Register G1PreBarrierStubC2::obj() const {
+ return _obj;
+}
- // An other case of only one user (Xor) is when the value check for null
- // in G1 post barrier is folded after CCP so the code which used URShift
- // is removed.
+Register G1PreBarrierStubC2::pre_val() const {
+ return _pre_val;
+}
- // Take Region node before eliminating post barrier since it also
- // eliminates CastP2X node when it has only one user.
- Node* this_region = node->in(0);
- assert(this_region != nullptr, "");
+Register G1PreBarrierStubC2::thread() const {
+ return _thread;
+}
- // Remove G1 post barrier.
+Register G1PreBarrierStubC2::tmp1() const {
+ return _tmp1;
+}
+
+Register G1PreBarrierStubC2::tmp2() const {
+ return _tmp2;
+}
- // Search for CastP2X->Xor->URShift->Cmp path which
- // checks if the store done to a different from the value's region.
- // And replace Cmp with #0 (false) to collapse G1 post barrier.
- Node* xorx = node->find_out_with(Op_XorX);
- if (xorx != nullptr) {
- Node* shift = xorx->unique_out();
- Node* cmpx = shift->unique_out();
- assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
- cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
- "missing region check in G1 post barrier");
- macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
+void G1PreBarrierStubC2::emit_code(MacroAssembler& masm) {
+ G1BarrierSetAssembler* bs = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ bs->generate_c2_pre_barrier_stub(&masm, this);
+}
- // Remove G1 pre barrier.
+G1PostBarrierStubC2::G1PostBarrierStubC2(const MachNode* node) : G1BarrierStubC2(node) {}
- // Search "if (marking != 0)" check and set it to "false".
- // There is no G1 pre barrier if previous stored value is null
- // (for example, after initialization).
- if (this_region->is_Region() && this_region->req() == 3) {
- int ind = 1;
- if (!this_region->in(ind)->is_IfFalse()) {
- ind = 2;
- }
- if (this_region->in(ind)->is_IfFalse() &&
- this_region->in(ind)->in(0)->Opcode() == Op_If) {
- Node* bol = this_region->in(ind)->in(0)->in(1);
- assert(bol->is_Bool(), "");
- cmpx = bol->in(1);
- if (bol->as_Bool()->_test._test == BoolTest::ne &&
- cmpx->is_Cmp() && cmpx->in(2) == macro->intcon(0) &&
- cmpx->in(1)->is_Load()) {
- Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address);
- const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
- if (adr->is_AddP() && adr->in(AddPNode::Base) == macro->top() &&
- adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
- adr->in(AddPNode::Offset) == macro->MakeConX(marking_offset)) {
- macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
- }
- }
- }
- }
- } else {
- assert(!use_ReduceInitialCardMarks(), "can only happen with card marking");
- // This is a G1 post barrier emitted by the Object.clone() intrinsic.
- // Search for the CastP2X->URShiftX->AddP->LoadB->Cmp path which checks if the card
- // is marked as young_gen and replace the Cmp with 0 (false) to collapse the barrier.
- Node* shift = node->find_out_with(Op_URShiftX);
- assert(shift != nullptr, "missing G1 post barrier");
- Node* addp = shift->unique_out();
- Node* load = addp->find_out_with(Op_LoadB);
- assert(load != nullptr, "missing G1 post barrier");
- Node* cmpx = load->unique_out();
- assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
- cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
- "missing card value check in G1 post barrier");
- macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
- // There is no G1 pre barrier in this case
- }
- // Now CastP2X can be removed since it is used only on dead path
- // which currently still alive until igvn optimize it.
- assert(node->outcnt() == 0 || node->unique_out()->Opcode() == Op_URShiftX, "");
- macro->replace_node(node, macro->top());
- }
+bool G1PostBarrierStubC2::needs_barrier(const MachNode* node) {
+ return (node->barrier_data() & G1C2BarrierPost) != 0;
}
-Node* G1BarrierSetC2::step_over_gc_barrier(Node* c) const {
- if (!use_ReduceInitialCardMarks() &&
- c != nullptr && c->is_Region() && c->req() == 3) {
- for (uint i = 1; i < c->req(); i++) {
- if (c->in(i) != nullptr && c->in(i)->is_Region() &&
- c->in(i)->req() == 3) {
- Node* r = c->in(i);
- for (uint j = 1; j < r->req(); j++) {
- if (r->in(j) != nullptr && r->in(j)->is_Proj() &&
- r->in(j)->in(0) != nullptr &&
- r->in(j)->in(0)->Opcode() == Op_CallLeaf &&
- r->in(j)->in(0)->as_Call()->entry_point() == CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry)) {
- Node* call = r->in(j)->in(0);
- c = c->in(i == 1 ? 2 : 1);
- if (c != nullptr && c->Opcode() != Op_Parm) {
- c = c->in(0);
- if (c != nullptr) {
- c = c->in(0);
- assert(call->in(0) == nullptr ||
- call->in(0)->in(0) == nullptr ||
- call->in(0)->in(0)->in(0) == nullptr ||
- call->in(0)->in(0)->in(0)->in(0) == nullptr ||
- call->in(0)->in(0)->in(0)->in(0)->in(0) == nullptr ||
- c == call->in(0)->in(0)->in(0)->in(0)->in(0), "bad barrier shape");
- return c;
- }
- }
- }
- }
- }
- }
+G1PostBarrierStubC2* G1PostBarrierStubC2::create(const MachNode* node) {
+ G1PostBarrierStubC2* const stub = new (Compile::current()->comp_arena()) G1PostBarrierStubC2(node);
+ if (!Compile::current()->output()->in_scratch_emit_size()) {
+ barrier_set_state()->stubs()->append(stub);
}
- return c;
+ return stub;
}
-#ifdef ASSERT
-bool G1BarrierSetC2::has_cas_in_use_chain(Node *n) const {
- Unique_Node_List visited;
- Node_List worklist;
- worklist.push(n);
- while (worklist.size() > 0) {
- Node* x = worklist.pop();
- if (visited.member(x)) {
- continue;
- } else {
- visited.push(x);
- }
+void G1PostBarrierStubC2::initialize_registers(Register thread, Register tmp1, Register tmp2, Register tmp3) {
+ _thread = thread;
+ _tmp1 = tmp1;
+ _tmp2 = tmp2;
+ _tmp3 = tmp3;
+}
- if (x->is_LoadStore()) {
- int op = x->Opcode();
- if (op == Op_CompareAndExchangeP || op == Op_CompareAndExchangeN ||
- op == Op_CompareAndSwapP || op == Op_CompareAndSwapN ||
- op == Op_WeakCompareAndSwapP || op == Op_WeakCompareAndSwapN) {
- return true;
- }
- }
- if (!x->is_CFG()) {
- for (SimpleDUIterator iter(x); iter.has_next(); iter.next()) {
- Node* use = iter.get();
- worklist.push(use);
- }
- }
- }
- return false;
+Register G1PostBarrierStubC2::thread() const {
+ return _thread;
}
-void G1BarrierSetC2::verify_pre_load(Node* marking_if, Unique_Node_List& loads /*output*/) const {
- assert(loads.size() == 0, "Loads list should be empty");
- Node* pre_val_if = marking_if->find_out_with(Op_IfTrue)->find_out_with(Op_If);
- if (pre_val_if != nullptr) {
- Unique_Node_List visited;
- Node_List worklist;
- Node* pre_val = pre_val_if->in(1)->in(1)->in(1);
+Register G1PostBarrierStubC2::tmp1() const {
+ return _tmp1;
+}
- worklist.push(pre_val);
- while (worklist.size() > 0) {
- Node* x = worklist.pop();
- if (visited.member(x)) {
- continue;
- } else {
- visited.push(x);
- }
+Register G1PostBarrierStubC2::tmp2() const {
+ return _tmp2;
+}
- if (has_cas_in_use_chain(x)) {
- loads.clear();
- return;
- }
+Register G1PostBarrierStubC2::tmp3() const {
+ return _tmp3;
+}
- if (x->is_Con()) {
- continue;
- }
- if (x->is_EncodeP() || x->is_DecodeN()) {
- worklist.push(x->in(1));
- continue;
- }
- if (x->is_Load() || x->is_LoadStore()) {
- assert(x->in(0) != nullptr, "Pre-val load has to have a control");
- loads.push(x);
- continue;
- }
- if (x->is_Phi()) {
- for (uint i = 1; i < x->req(); i++) {
- worklist.push(x->in(i));
- }
- continue;
- }
- assert(false, "Pre-val anomaly");
- }
- }
+void G1PostBarrierStubC2::emit_code(MacroAssembler& masm) {
+ G1BarrierSetAssembler* bs = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ bs->generate_c2_post_barrier_stub(&masm, this);
}
-void G1BarrierSetC2::verify_no_safepoints(Compile* compile, Node* marking_check_if, const Unique_Node_List& loads) const {
- if (loads.size() == 0) {
- return;
- }
+void* G1BarrierSetC2::create_barrier_state(Arena* comp_arena) const {
+ return new (comp_arena) G1BarrierSetC2State(comp_arena);
+}
- if (loads.size() == 1) { // Handle the typical situation when there a single pre-value load
- // that is dominated by the marking_check_if, that's true when the
- // barrier itself does the pre-val load.
- Node *pre_val = loads.at(0);
- if (pre_val->in(0)->in(0) == marking_check_if) { // IfTrue->If
- return;
- }
+int G1BarrierSetC2::get_store_barrier(C2Access& access) const {
+ if (!access.is_parse_access()) {
+ // Only support for eliding barriers at parse time for now.
+ return G1C2BarrierPre | G1C2BarrierPost;
}
-
- // All other cases are when pre-value loads dominate the marking check.
- Unique_Node_List controls;
- for (uint i = 0; i < loads.size(); i++) {
- Node *c = loads.at(i)->in(0);
- controls.push(c);
+ GraphKit* kit = (static_cast(access)).kit();
+ Node* ctl = kit->control();
+ Node* adr = access.addr().node();
+ uint adr_idx = kit->C->get_alias_index(access.addr().type());
+ assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory");
+
+ bool can_remove_pre_barrier = g1_can_remove_pre_barrier(kit, &kit->gvn(), adr, access.type(), adr_idx);
+
+ // We can skip marks on a freshly-allocated object in Eden. Keep this code in
+ // sync with CardTableBarrierSet::on_slowpath_allocation_exit. That routine
+ // informs GC to take appropriate compensating steps, upon a slow-path
+ // allocation, so as to make this card-mark elision safe.
+ // The post-barrier can also be removed if null is written. This case is
+ // handled by G1BarrierSetC2::expand_barriers, which runs at the end of C2's
+ // platform-independent optimizations to exploit stronger type information.
+ bool can_remove_post_barrier = use_ReduceInitialCardMarks() &&
+ ((access.base() == kit->just_allocated_object(ctl)) ||
+ g1_can_remove_post_barrier(kit, &kit->gvn(), ctl, adr));
+
+ int barriers = 0;
+ if (!can_remove_pre_barrier) {
+ barriers |= G1C2BarrierPre;
+ }
+ if (!can_remove_post_barrier) {
+ barriers |= G1C2BarrierPost;
}
- Unique_Node_List visited;
- Unique_Node_List safepoints;
- Node_List worklist;
- uint found = 0;
+ return barriers;
+}
- worklist.push(marking_check_if);
- while (worklist.size() > 0 && found < controls.size()) {
- Node* x = worklist.pop();
- if (x == nullptr || x == compile->top()) continue;
- if (visited.member(x)) {
- continue;
- } else {
- visited.push(x);
- }
+void G1BarrierSetC2::late_barrier_analysis() const {
+ compute_liveness_at_stubs();
+}
- if (controls.member(x)) {
- found++;
- }
- if (x->is_Region()) {
- for (uint i = 1; i < x->req(); i++) {
- worklist.push(x->in(i));
- }
- } else {
- if (!x->is_SafePoint()) {
- worklist.push(x->in(0));
- } else {
- safepoints.push(x);
- }
+void G1BarrierSetC2::emit_stubs(CodeBuffer& cb) const {
+ MacroAssembler masm(&cb);
+ GrowableArray* const stubs = barrier_set_state()->stubs();
+ for (int i = 0; i < stubs->length(); i++) {
+ // Make sure there is enough space in the code buffer
+ if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == nullptr) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
}
+ stubs->at(i)->emit_code(masm);
}
- assert(found == controls.size(), "Pre-barrier structure anomaly or possible safepoint");
+ masm.flush();
}
-void G1BarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const {
- if (phase != BarrierSetC2::BeforeCodeGen) {
- return;
+#ifndef PRODUCT
+void G1BarrierSetC2::dump_barrier_data(const MachNode* mach, outputStream* st) const {
+ if ((mach->barrier_data() & G1C2BarrierPre) != 0) {
+ st->print("pre ");
}
- // Verify G1 pre-barriers
- const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
-
- Unique_Node_List visited;
- Node_List worklist;
- // We're going to walk control flow backwards starting from the Root
- worklist.push(compile->root());
- while (worklist.size() > 0) {
- Node* x = worklist.pop();
- if (x == nullptr || x == compile->top()) continue;
- if (visited.member(x)) {
- continue;
- } else {
- visited.push(x);
- }
-
- if (x->is_Region()) {
- for (uint i = 1; i < x->req(); i++) {
- worklist.push(x->in(i));
- }
- } else {
- worklist.push(x->in(0));
- // We are looking for the pattern:
- // /->ThreadLocal
- // If->Bool->CmpI->LoadB->AddP->ConL(marking_offset)
- // \->ConI(0)
- // We want to verify that the If and the LoadB have the same control
- // See GraphKit::g1_write_barrier_pre()
- if (x->is_If()) {
- IfNode *iff = x->as_If();
- if (iff->in(1)->is_Bool() && iff->in(1)->in(1)->is_Cmp()) {
- CmpNode *cmp = iff->in(1)->in(1)->as_Cmp();
- if (cmp->Opcode() == Op_CmpI && cmp->in(2)->is_Con() && cmp->in(2)->bottom_type()->is_int()->get_con() == 0
- && cmp->in(1)->is_Load()) {
- LoadNode* load = cmp->in(1)->as_Load();
- if (load->Opcode() == Op_LoadB && load->in(2)->is_AddP() && load->in(2)->in(2)->Opcode() == Op_ThreadLocal
- && load->in(2)->in(3)->is_Con()
- && load->in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == marking_offset) {
-
- Node* if_ctrl = iff->in(0);
- Node* load_ctrl = load->in(0);
-
- if (if_ctrl != load_ctrl) {
- // Skip possible CProj->NeverBranch in infinite loops
- if ((if_ctrl->is_Proj() && if_ctrl->Opcode() == Op_CProj)
- && if_ctrl->in(0)->is_NeverBranch()) {
- if_ctrl = if_ctrl->in(0)->in(0);
- }
- }
- assert(load_ctrl != nullptr && if_ctrl == load_ctrl, "controls must match");
-
- Unique_Node_List loads;
- verify_pre_load(iff, loads);
- verify_no_safepoints(compile, iff, loads);
- }
- }
- }
- }
- }
+ if ((mach->barrier_data() & G1C2BarrierPost) != 0) {
+ st->print("post ");
}
-}
-#endif
-
-bool G1BarrierSetC2::escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const {
- if (opcode == Op_StoreP) {
- Node* adr = n->in(MemNode::Address);
- const Type* adr_type = gvn->type(adr);
- // Pointer stores in G1 barriers looks like unsafe access.
- // Ignore such stores to be able scalar replace non-escaping
- // allocations.
- if (adr_type->isa_rawptr() && adr->is_AddP()) {
- Node* base = conn_graph->get_addp_base(adr);
- if (base->Opcode() == Op_LoadP &&
- base->in(MemNode::Address)->is_AddP()) {
- adr = base->in(MemNode::Address);
- Node* tls = conn_graph->get_addp_base(adr);
- if (tls->Opcode() == Op_ThreadLocal) {
- int offs = (int) gvn->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot);
- const int buf_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
- if (offs == buf_offset) {
- return true; // G1 pre barrier previous oop value store.
- }
- if (offs == in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())) {
- return true; // G1 post barrier card address store.
- }
- }
- }
- }
+ if ((mach->barrier_data() & G1C2BarrierPostNotNull) != 0) {
+ st->print("notnull ");
}
- return false;
}
+#endif // !PRODUCT
diff --git a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
index c445a87d2e46d..dc333d8c33174 100644
--- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
+++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
@@ -31,29 +31,62 @@ class PhaseTransform;
class Type;
class TypeFunc;
-class G1BarrierSetC2: public CardTableBarrierSetC2 {
+const int G1C2BarrierPre = 1;
+const int G1C2BarrierPost = 2;
+const int G1C2BarrierPostNotNull = 4;
+
+class G1BarrierStubC2 : public BarrierStubC2 {
+public:
+ G1BarrierStubC2(const MachNode* node);
+ virtual void emit_code(MacroAssembler& masm) = 0;
+};
+
+class G1PreBarrierStubC2 : public G1BarrierStubC2 {
+private:
+ Register _obj;
+ Register _pre_val;
+ Register _thread;
+ Register _tmp1;
+ Register _tmp2;
+
+protected:
+ G1PreBarrierStubC2(const MachNode* node);
+
+public:
+ static bool needs_barrier(const MachNode* node);
+ static G1PreBarrierStubC2* create(const MachNode* node);
+ void initialize_registers(Register obj, Register pre_val, Register thread, Register tmp1 = noreg, Register tmp2 = noreg);
+ Register obj() const;
+ Register pre_val() const;
+ Register thread() const;
+ Register tmp1() const;
+ Register tmp2() const;
+ virtual void emit_code(MacroAssembler& masm);
+};
+
+class G1PostBarrierStubC2 : public G1BarrierStubC2 {
+private:
+ Register _thread;
+ Register _tmp1;
+ Register _tmp2;
+ Register _tmp3;
+
protected:
- virtual void pre_barrier(GraphKit* kit,
- bool do_load,
- Node* ctl,
- Node* obj,
- Node* adr,
- uint adr_idx,
- Node* val,
- const TypeOopPtr* val_type,
- Node* pre_val,
- BasicType bt) const;
-
- virtual void post_barrier(GraphKit* kit,
- Node* ctl,
- Node* store,
- Node* obj,
- Node* adr,
- uint adr_idx,
- Node* val,
- BasicType bt,
- bool use_precise) const;
+ G1PostBarrierStubC2(const MachNode* node);
+public:
+ static bool needs_barrier(const MachNode* node);
+ static G1PostBarrierStubC2* create(const MachNode* node);
+ void initialize_registers(Register thread, Register tmp1 = noreg, Register tmp2 = noreg, Register tmp3 = noreg);
+ Register thread() const;
+ Register tmp1() const;
+ Register tmp2() const;
+ Register tmp3() const;
+ virtual void emit_code(MacroAssembler& masm);
+};
+
+class G1BarrierSetC2: public CardTableBarrierSetC2 {
+protected:
bool g1_can_remove_pre_barrier(GraphKit* kit,
PhaseValues* phase,
Node* adr,
@@ -64,44 +97,31 @@ class G1BarrierSetC2: public CardTableBarrierSetC2 {
PhaseValues* phase, Node* store,
Node* adr) const;
- void g1_mark_card(GraphKit* kit,
- IdealKit& ideal,
- Node* card_adr,
- Node* oop_store,
- uint oop_alias_idx,
- Node* index,
- Node* index_adr,
- Node* buffer,
- const TypeFunc* tf) const;
-
- // Helper for unsafe accesses, that may or may not be on the referent field.
- // Generates the guards that check whether the result of
- // Unsafe.getReference should be recorded in an SATB log buffer.
- void insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar) const;
-
- static const TypeFunc* write_ref_field_pre_entry_Type();
- static const TypeFunc* write_ref_field_post_entry_Type();
+ int get_store_barrier(C2Access& access) const;
virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const;
+ virtual Node* store_at_resolved(C2Access& access, C2AccessValue& val) const;
+ virtual Node* atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
+ Node* new_val, const Type* value_type) const;
+ virtual Node* atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
+ Node* new_val, const Type* value_type) const;
+ virtual Node* atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const;
-#ifdef ASSERT
- bool has_cas_in_use_chain(Node* x) const;
- void verify_pre_load(Node* marking_check_if, Unique_Node_List& loads /*output*/) const;
- void verify_no_safepoints(Compile* compile, Node* marking_load, const Unique_Node_List& loads) const;
-#endif
-
- static bool is_g1_pre_val_load(Node* n);
public:
- virtual bool is_gc_pre_barrier_node(Node* node) const;
- virtual bool is_gc_barrier_node(Node* node) const;
virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const;
- virtual Node* step_over_gc_barrier(Node* c) const;
-
-#ifdef ASSERT
- virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const;
+ virtual void eliminate_gc_barrier_data(Node* node) const;
+ virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const;
+ virtual uint estimated_barrier_size(const Node* node) const;
+ virtual bool can_initialize_object(const StoreNode* store) const;
+ virtual void clone_at_expansion(PhaseMacroExpand* phase,
+ ArrayCopyNode* ac) const;
+ virtual void* create_barrier_state(Arena* comp_arena) const;
+ virtual void emit_stubs(CodeBuffer& cb) const;
+ virtual void late_barrier_analysis() const;
+
+#ifndef PRODUCT
+ virtual void dump_barrier_data(const MachNode* mach, outputStream* st) const;
#endif
-
- virtual bool escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const;
};
#endif // SHARE_GC_G1_C2_G1BARRIERSETC2_HPP
diff --git a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp
index a0fce437807f4..2e247f46c93d8 100644
--- a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp
@@ -61,3 +61,11 @@ JRT_LEAF(void, G1BarrierSetRuntime::write_ref_field_post_entry(volatile G1CardTa
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
G1BarrierSet::dirty_card_queue_set().enqueue(queue, card_addr);
JRT_END
+
+JRT_LEAF(void, G1BarrierSetRuntime::clone(oopDesc* src, oopDesc* dst, size_t size))
+ HeapAccess<>::clone(src, dst, size);
+JRT_END
+
+address G1BarrierSetRuntime::clone_addr() {
+ return reinterpret_cast(clone);
+}
diff --git a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp
index 366679f032ba9..f98e94096e72d 100644
--- a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp
@@ -35,6 +35,8 @@ class oopDesc;
class JavaThread;
class G1BarrierSetRuntime: public AllStatic {
+private:
+ static void clone(oopDesc* src, oopDesc* dst, size_t size);
public:
using CardValue = G1CardTable::CardValue;
@@ -46,6 +48,8 @@ class G1BarrierSetRuntime: public AllStatic {
// C2 slow-path runtime calls.
static void write_ref_field_pre_entry(oopDesc* orig, JavaThread *thread);
static void write_ref_field_post_entry(volatile CardValue* card_addr, JavaThread* thread);
+
+ static address clone_addr();
};
#endif // SHARE_GC_G1_G1BARRIERSETRUNTIME_HPP
diff --git a/src/hotspot/share/gc/g1/g1BatchedTask.hpp b/src/hotspot/share/gc/g1/g1BatchedTask.hpp
index aa16f4ddfd48d..020fda634e4b8 100644
--- a/src/hotspot/share/gc/g1/g1BatchedTask.hpp
+++ b/src/hotspot/share/gc/g1/g1BatchedTask.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,7 +29,7 @@
#include "gc/shared/workerThread.hpp"
#include "memory/allocation.hpp"
-template
+template
class GrowableArrayCHeap;
// G1AbstractSubTask represents a task to be performed either within a
diff --git a/src/hotspot/share/gc/g1/g1MonotonicArena.cpp b/src/hotspot/share/gc/g1/g1MonotonicArena.cpp
index 81748d277cff9..b2706d7a9463c 100644
--- a/src/hotspot/share/gc/g1/g1MonotonicArena.cpp
+++ b/src/hotspot/share/gc/g1/g1MonotonicArena.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,22 +29,22 @@
#include "runtime/vmOperations.hpp"
#include "utilities/globalCounter.inline.hpp"
-G1MonotonicArena::Segment::Segment(uint slot_size, uint num_slots, Segment* next, MEMFLAGS flag) :
+G1MonotonicArena::Segment::Segment(uint slot_size, uint num_slots, Segment* next, MemTag mem_tag) :
_slot_size(slot_size),
_num_slots(num_slots),
_next(next),
_next_allocate(0),
- _mem_flag(flag) {
+ _mem_tag(mem_tag) {
_bottom = ((char*) this) + header_size();
}
G1MonotonicArena::Segment* G1MonotonicArena::Segment::create_segment(uint slot_size,
uint num_slots,
Segment* next,
- MEMFLAGS mem_flag) {
+ MemTag mem_tag) {
size_t block_size = size_in_bytes(slot_size, num_slots);
- char* alloc_block = NEW_C_HEAP_ARRAY(char, block_size, mem_flag);
- return new (alloc_block) Segment(slot_size, num_slots, next, mem_flag);
+ char* alloc_block = NEW_C_HEAP_ARRAY(char, block_size, mem_tag);
+ return new (alloc_block) Segment(slot_size, num_slots, next, mem_tag);
}
void G1MonotonicArena::Segment::delete_segment(Segment* segment) {
@@ -54,7 +54,7 @@ void G1MonotonicArena::Segment::delete_segment(Segment* segment) {
GlobalCounter::write_synchronize();
}
segment->~Segment();
- FREE_C_HEAP_ARRAY(_mem_flag, segment);
+ FREE_C_HEAP_ARRAY(_mem_tag, segment);
}
void G1MonotonicArena::SegmentFreeList::bulk_add(Segment& first,
@@ -108,7 +108,7 @@ G1MonotonicArena::Segment* G1MonotonicArena::new_segment(Segment* const prev) {
uint prev_num_slots = (prev != nullptr) ? prev->num_slots() : 0;
uint num_slots = _alloc_options->next_num_slots(prev_num_slots);
- next = Segment::create_segment(slot_size(), num_slots, prev, _alloc_options->mem_flag());
+ next = Segment::create_segment(slot_size(), num_slots, prev, _alloc_options->mem_tag());
} else {
assert(slot_size() == next->slot_size() ,
"Mismatch %d != %d", slot_size(), next->slot_size());
diff --git a/src/hotspot/share/gc/g1/g1MonotonicArena.hpp b/src/hotspot/share/gc/g1/g1MonotonicArena.hpp
index bf46e4a33513a..b51f3e37db180 100644
--- a/src/hotspot/share/gc/g1/g1MonotonicArena.hpp
+++ b/src/hotspot/share/gc/g1/g1MonotonicArena.hpp
@@ -27,7 +27,7 @@
#define SHARE_GC_G1_G1MONOTONICARENA_HPP
#include "gc/shared/freeListAllocator.hpp"
-#include "nmt/memflags.hpp"
+#include "nmt/memTag.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/lockFreeStack.hpp"
@@ -120,7 +120,7 @@ class G1MonotonicArena::Segment {
// to _num_slots (can be larger because we atomically increment this value and
// check only afterwards if the allocation has been successful).
uint volatile _next_allocate;
- const MEMFLAGS _mem_flag;
+ const MemTag _mem_tag;
char* _bottom; // Actual data.
// Do not add class member variables beyond this point
@@ -136,7 +136,7 @@ class G1MonotonicArena::Segment {
NONCOPYABLE(Segment);
- Segment(uint slot_size, uint num_slots, Segment* next, MEMFLAGS flag);
+ Segment(uint slot_size, uint num_slots, Segment* next, MemTag mem_tag);
~Segment() = default;
public:
Segment* volatile* next_addr() { return &_next; }
@@ -173,7 +173,7 @@ class G1MonotonicArena::Segment {
return header_size() + payload_size(slot_size, num_slots);
}
- static Segment* create_segment(uint slot_size, uint num_slots, Segment* next, MEMFLAGS mem_flag);
+ static Segment* create_segment(uint slot_size, uint num_slots, Segment* next, MemTag mem_tag);
static void delete_segment(Segment* segment);
// Copies the contents of this segment into the destination.
@@ -222,7 +222,7 @@ class G1MonotonicArena::SegmentFreeList {
class G1MonotonicArena::AllocOptions {
protected:
- const MEMFLAGS _mem_flag;
+ const MemTag _mem_tag;
const uint _slot_size;
const uint _initial_num_slots;
// Defines a limit to the number of slots in the segment
@@ -230,8 +230,8 @@ class G1MonotonicArena::AllocOptions {
const uint _slot_alignment;
public:
- AllocOptions(MEMFLAGS mem_flag, uint slot_size, uint initial_num_slots, uint max_num_slots, uint alignment) :
- _mem_flag(mem_flag),
+ AllocOptions(MemTag mem_tag, uint slot_size, uint initial_num_slots, uint max_num_slots, uint alignment) :
+ _mem_tag(mem_tag),
_slot_size(align_up(slot_size, alignment)),
_initial_num_slots(initial_num_slots),
_max_num_slots(max_num_slots),
@@ -250,7 +250,7 @@ class G1MonotonicArena::AllocOptions {
uint slot_alignment() const { return _slot_alignment; }
- MEMFLAGS mem_flag() const {return _mem_flag; }
+ MemTag mem_tag() const {return _mem_tag; }
};
#endif //SHARE_GC_G1_MONOTONICARENA_HPP
diff --git a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp
index 1cfd6fca08a6f..3f7fefd8a07a6 100644
--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp
+++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp
@@ -87,8 +87,6 @@ class G1ParScanThreadState : public CHeapObj {
// Indicates whether in the last generation (old) there is no more space
// available for allocation.
bool _old_gen_is_full;
- // Size (in elements) of a partial objArray task chunk.
- size_t _partial_objarray_chunk_size;
PartialArrayStateAllocator* _partial_array_state_allocator;
PartialArrayTaskStepper _partial_array_stepper;
StringDedup::Requests _string_dedup_requests;
diff --git a/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.cpp b/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.cpp
index 5f903960cce8c..4403b4c8dd981 100644
--- a/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.cpp
+++ b/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -40,15 +40,15 @@ G1RegionToSpaceMapper::G1RegionToSpaceMapper(ReservedSpace rs,
size_t page_size,
size_t region_granularity,
size_t commit_factor,
- MEMFLAGS type) :
+ MemTag mem_tag) :
_listener(nullptr),
_storage(rs, used_size, page_size),
_region_commit_map(rs.size() * commit_factor / region_granularity, mtGC),
- _memory_type(type) {
+ _memory_tag(mem_tag) {
guarantee(is_power_of_2(page_size), "must be");
guarantee(is_power_of_2(region_granularity), "must be");
- MemTracker::record_virtual_memory_type((address)rs.base(), type);
+ MemTracker::record_virtual_memory_tag((address)rs.base(), mem_tag);
}
// Used to manually signal a mapper to handle a set of regions as committed.
@@ -72,8 +72,8 @@ class G1RegionsLargerThanCommitSizeMapper : public G1RegionToSpaceMapper {
size_t page_size,
size_t alloc_granularity,
size_t commit_factor,
- MEMFLAGS type) :
- G1RegionToSpaceMapper(rs, actual_size, page_size, alloc_granularity, commit_factor, type),
+ MemTag mem_tag) :
+ G1RegionToSpaceMapper(rs, actual_size, page_size, alloc_granularity, commit_factor, mem_tag),
_pages_per_region(alloc_granularity / (page_size * commit_factor)) {
guarantee(alloc_granularity >= page_size, "allocation granularity smaller than commit granularity");
@@ -97,7 +97,7 @@ class G1RegionsLargerThanCommitSizeMapper : public G1RegionToSpaceMapper {
const size_t start_page = (size_t)start_idx * _pages_per_region;
const size_t size_in_pages = num_regions * _pages_per_region;
bool zero_filled = _storage.commit(start_page, size_in_pages);
- if (_memory_type == mtJavaHeap) {
+ if (_memory_tag == mtJavaHeap) {
for (uint region_index = start_idx; region_index < start_idx + num_regions; region_index++ ) {
void* address = _storage.page_start(region_index * _pages_per_region);
size_t size_in_bytes = _storage.page_size() * _pages_per_region;
@@ -150,7 +150,7 @@ class G1RegionsSmallerThanCommitSizeMapper : public G1RegionToSpaceMapper {
}
void numa_request_on_node(size_t page_idx) {
- if (_memory_type == mtJavaHeap) {
+ if (_memory_tag == mtJavaHeap) {
uint region = (uint)(page_idx * _regions_per_page);
void* address = _storage.page_start(page_idx);
size_t size_in_bytes = _storage.page_size();
@@ -164,8 +164,8 @@ class G1RegionsSmallerThanCommitSizeMapper : public G1RegionToSpaceMapper {
size_t page_size,
size_t alloc_granularity,
size_t commit_factor,
- MEMFLAGS type) :
- G1RegionToSpaceMapper(rs, actual_size, page_size, alloc_granularity, commit_factor, type),
+ MemTag mem_tag) :
+ G1RegionToSpaceMapper(rs, actual_size, page_size, alloc_granularity, commit_factor, mem_tag),
_regions_per_page((page_size * commit_factor) / alloc_granularity),
_lock(Mutex::service-3, "G1Mapper_lock") {
@@ -263,10 +263,10 @@ G1RegionToSpaceMapper* G1RegionToSpaceMapper::create_mapper(ReservedSpace rs,
size_t page_size,
size_t region_granularity,
size_t commit_factor,
- MEMFLAGS type) {
+ MemTag mem_tag) {
if (region_granularity >= (page_size * commit_factor)) {
- return new G1RegionsLargerThanCommitSizeMapper(rs, actual_size, page_size, region_granularity, commit_factor, type);
+ return new G1RegionsLargerThanCommitSizeMapper(rs, actual_size, page_size, region_granularity, commit_factor, mem_tag);
} else {
- return new G1RegionsSmallerThanCommitSizeMapper(rs, actual_size, page_size, region_granularity, commit_factor, type);
+ return new G1RegionsSmallerThanCommitSizeMapper(rs, actual_size, page_size, region_granularity, commit_factor, mem_tag);
}
}
diff --git a/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.hpp b/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.hpp
index 02498b394b39f..5ef0f8ec5ab51 100644
--- a/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.hpp
+++ b/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -52,9 +52,9 @@ class G1RegionToSpaceMapper : public CHeapObj {
// Mapping management
CHeapBitMap _region_commit_map;
- MEMFLAGS _memory_type;
+ MemTag _memory_tag;
- G1RegionToSpaceMapper(ReservedSpace rs, size_t used_size, size_t page_size, size_t region_granularity, size_t commit_factor, MEMFLAGS type);
+ G1RegionToSpaceMapper(ReservedSpace rs, size_t used_size, size_t page_size, size_t region_granularity, size_t commit_factor, MemTag mem_tag);
void fire_on_commit(uint start_idx, size_t num_regions, bool zero_filled);
public:
@@ -85,7 +85,7 @@ class G1RegionToSpaceMapper : public CHeapObj {
size_t page_size,
size_t region_granularity,
size_t byte_translation_factor,
- MEMFLAGS type);
+ MemTag mem_tag);
};
#endif // SHARE_GC_G1_G1REGIONTOSPACEMAPPER_HPP
diff --git a/src/hotspot/share/gc/parallel/objectStartArray.cpp b/src/hotspot/share/gc/parallel/objectStartArray.cpp
index b1fc956a54a21..ef9de7abfd771 100644
--- a/src/hotspot/share/gc/parallel/objectStartArray.cpp
+++ b/src/hotspot/share/gc/parallel/objectStartArray.cpp
@@ -51,7 +51,7 @@ void ObjectStartArray::initialize(MemRegion reserved_region) {
if (!backing_store.is_reserved()) {
vm_exit_during_initialization("Could not reserve space for ObjectStartArray");
}
- MemTracker::record_virtual_memory_type(backing_store.base(), mtGC);
+ MemTracker::record_virtual_memory_tag(backing_store.base(), mtGC);
// We do not commit any memory initially
_virtual_space.initialize(backing_store);
diff --git a/src/hotspot/share/gc/parallel/parMarkBitMap.cpp b/src/hotspot/share/gc/parallel/parMarkBitMap.cpp
index 658c3ef106fa0..46a178500e576 100644
--- a/src/hotspot/share/gc/parallel/parMarkBitMap.cpp
+++ b/src/hotspot/share/gc/parallel/parMarkBitMap.cpp
@@ -51,7 +51,7 @@ ParMarkBitMap::initialize(MemRegion covered_region)
os::trace_page_sizes("Mark Bitmap", raw_bytes, raw_bytes,
rs.base(), rs.size(), used_page_sz);
- MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
+ MemTracker::record_virtual_memory_tag((address)rs.base(), mtGC);
_virtual_space = new PSVirtualSpace(rs, page_sz);
if (_virtual_space != nullptr && _virtual_space->expand_by(_reserved_byte_size)) {
diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.cpp b/src/hotspot/share/gc/parallel/psParallelCompact.cpp
index 4bff8f8a7d06a..1ab7b2af7ed74 100644
--- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp
@@ -235,7 +235,7 @@ ParallelCompactData::create_vspace(size_t count, size_t element_size)
os::trace_page_sizes("Parallel Compact Data", raw_bytes, raw_bytes, rs.base(),
rs.size(), page_sz);
- MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
+ MemTracker::record_virtual_memory_tag((address)rs.base(), mtGC);
PSVirtualSpace* vspace = new PSVirtualSpace(rs, page_sz);
if (vspace != nullptr) {
diff --git a/src/hotspot/share/gc/parallel/psScavenge.hpp b/src/hotspot/share/gc/parallel/psScavenge.hpp
index 99d0487760b15..55abdfd3cf38e 100644
--- a/src/hotspot/share/gc/parallel/psScavenge.hpp
+++ b/src/hotspot/share/gc/parallel/psScavenge.hpp
@@ -34,9 +34,7 @@
#include "oops/oop.hpp"
#include "utilities/stack.hpp"
-class ReferenceProcessor;
class ParallelScavengeHeap;
-class ParallelScavengeTracer;
class PSIsAliveClosure;
class STWGCTimer;
diff --git a/src/hotspot/share/gc/serial/serialBlockOffsetTable.cpp b/src/hotspot/share/gc/serial/serialBlockOffsetTable.cpp
index 59b7f130df30b..31f18652c63d6 100644
--- a/src/hotspot/share/gc/serial/serialBlockOffsetTable.cpp
+++ b/src/hotspot/share/gc/serial/serialBlockOffsetTable.cpp
@@ -42,7 +42,7 @@ SerialBlockOffsetTable::SerialBlockOffsetTable(MemRegion reserved,
vm_exit_during_initialization("Could not reserve enough space for heap offset array");
}
- MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
+ MemTracker::record_virtual_memory_tag((address)rs.base(), mtGC);
if (!_vs.initialize(rs, 0)) {
vm_exit_during_initialization("Could not reserve enough space for heap offset array");
diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
index 59e0245204441..643a7936b9b17 100644
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
@@ -109,6 +109,10 @@ Label* BarrierStubC2::continuation() {
return &_continuation;
}
+uint8_t BarrierStubC2::barrier_data() const {
+ return _node->barrier_data();
+}
+
void BarrierStubC2::preserve(Register r) {
const VMReg vm_reg = r->as_VMReg();
assert(vm_reg->is_Register(), "r must be a general-purpose register");
diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
index c1485c069c83c..00fbf1f2c9f8b 100644
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
@@ -254,6 +254,8 @@ class BarrierStubC2 : public ArenaObj {
Label* entry();
// Return point from the stub (typically end of barrier).
Label* continuation();
+ // High-level, GC-specific barrier flags.
+ uint8_t barrier_data() const;
// Preserve the value in reg across runtime calls in this barrier.
void preserve(Register reg);
@@ -340,6 +342,8 @@ class BarrierSetC2: public CHeapObj {
// Estimated size of the node barrier in number of C2 Ideal nodes.
// This is used to guide heuristics in C2, e.g. whether to unroll a loop.
virtual uint estimated_barrier_size(const Node* node) const { return 0; }
+ // Whether the given store can be used to initialize a newly allocated object.
+ virtual bool can_initialize_object(const StoreNode* store) const { return true; }
enum CompilePhase {
BeforeOptimize,
diff --git a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp
index 87bb9f3cd5170..11b742156a831 100644
--- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp
+++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp
@@ -125,39 +125,10 @@ void CardTableBarrierSetC2::post_barrier(GraphKit* kit,
kit->final_sync(ideal);
}
-void CardTableBarrierSetC2::clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const {
- BarrierSetC2::clone(kit, src, dst, size, is_array);
- const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
-
- // If necessary, emit some card marks afterwards. (Non-arrays only.)
- bool card_mark = !is_array && !use_ReduceInitialCardMarks();
- if (card_mark) {
- assert(!is_array, "");
- // Put in store barrier for any and all oops we are sticking
- // into this object. (We could avoid this if we could prove
- // that the object type contains no oop fields at all.)
- Node* no_particular_value = nullptr;
- Node* no_particular_field = nullptr;
- int raw_adr_idx = Compile::AliasIdxRaw;
- post_barrier(kit, kit->control(),
- kit->memory(raw_adr_type),
- dst,
- no_particular_field,
- raw_adr_idx,
- no_particular_value,
- T_OBJECT,
- false);
- }
-}
-
bool CardTableBarrierSetC2::use_ReduceInitialCardMarks() const {
return ReduceInitialCardMarks;
}
-bool CardTableBarrierSetC2::is_gc_barrier_node(Node* node) const {
- return ModRefBarrierSetC2::is_gc_barrier_node(node) || node->Opcode() == Op_StoreCM;
-}
-
void CardTableBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required");
Node *shift = node->unique_out();
diff --git a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp
index 9512f09ff8a6d..3bbf14892d3ef 100644
--- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp
+++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp
@@ -42,8 +42,6 @@ class CardTableBarrierSetC2: public ModRefBarrierSetC2 {
Node* byte_map_base_node(GraphKit* kit) const;
public:
- virtual void clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const;
- virtual bool is_gc_barrier_node(Node* node) const;
virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const;
virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, bool is_clone_instance, ArrayCopyPhase phase) const;
diff --git a/src/hotspot/share/gc/shared/cardTable.cpp b/src/hotspot/share/gc/shared/cardTable.cpp
index 95f7058f4e4ee..acd4bda6e1071 100644
--- a/src/hotspot/share/gc/shared/cardTable.cpp
+++ b/src/hotspot/share/gc/shared/cardTable.cpp
@@ -84,7 +84,7 @@ void CardTable::initialize(void* region0_start, void* region1_start) {
MAX2(_page_size, os::vm_allocation_granularity());
ReservedSpace heap_rs(_byte_map_size, rs_align, _page_size);
- MemTracker::record_virtual_memory_type((address)heap_rs.base(), mtGC);
+ MemTracker::record_virtual_memory_tag((address)heap_rs.base(), mtGC);
os::trace_page_sizes("Card Table", num_bytes, num_bytes,
heap_rs.base(), heap_rs.size(), _page_size);
diff --git a/src/hotspot/share/gc/shared/oopStorage.cpp b/src/hotspot/share/gc/shared/oopStorage.cpp
index 7117b86b26403..568888ac7d97e 100644
--- a/src/hotspot/share/gc/shared/oopStorage.cpp
+++ b/src/hotspot/share/gc/shared/oopStorage.cpp
@@ -127,10 +127,10 @@ OopStorage::ActiveArray::~ActiveArray() {
}
OopStorage::ActiveArray* OopStorage::ActiveArray::create(size_t size,
- MEMFLAGS memflags,
+ MemTag mem_tag,
AllocFailType alloc_fail) {
size_t size_in_bytes = blocks_offset() + sizeof(Block*) * size;
- void* mem = NEW_C_HEAP_ARRAY3(char, size_in_bytes, memflags, CURRENT_PC, alloc_fail);
+ void* mem = NEW_C_HEAP_ARRAY3(char, size_in_bytes, mem_tag, CURRENT_PC, alloc_fail);
if (mem == nullptr) return nullptr;
return new (mem) ActiveArray(size);
}
@@ -343,7 +343,7 @@ OopStorage::Block* OopStorage::Block::new_block(const OopStorage* owner) {
// _data must be first member: aligning block => aligning _data.
STATIC_ASSERT(_data_pos == 0);
size_t size_needed = allocation_size();
- void* memory = NEW_C_HEAP_ARRAY_RETURN_NULL(char, size_needed, owner->memflags());
+ void* memory = NEW_C_HEAP_ARRAY_RETURN_NULL(char, size_needed, owner->mem_tag());
if (memory == nullptr) {
return nullptr;
}
@@ -575,7 +575,7 @@ bool OopStorage::expand_active_array() {
log_debug(oopstorage, blocks)("%s: expand active array " SIZE_FORMAT,
name(), new_size);
ActiveArray* new_array = ActiveArray::create(new_size,
- memflags(),
+ mem_tag(),
AllocFailStrategy::RETURN_NULL);
if (new_array == nullptr) return false;
new_array->copy_from(old_array);
@@ -805,8 +805,8 @@ void OopStorage::release(const oop* const* ptrs, size_t size) {
}
}
-OopStorage* OopStorage::create(const char* name, MEMFLAGS memflags) {
- return new (memflags) OopStorage(name, memflags);
+OopStorage* OopStorage::create(const char* name, MemTag mem_tag) {
+ return new (mem_tag) OopStorage(name, mem_tag);
}
const size_t initial_active_array_size = 8;
@@ -819,9 +819,9 @@ static Mutex* make_oopstorage_mutex(const char* storage_name,
return new PaddedMutex(rank, name);
}
-OopStorage::OopStorage(const char* name, MEMFLAGS memflags) :
+OopStorage::OopStorage(const char* name, MemTag mem_tag) :
_name(os::strdup(name)),
- _active_array(ActiveArray::create(initial_active_array_size, memflags)),
+ _active_array(ActiveArray::create(initial_active_array_size, mem_tag)),
_allocation_list(),
_deferred_updates(nullptr),
_allocation_mutex(make_oopstorage_mutex(name, "alloc", Mutex::oopstorage)),
@@ -829,7 +829,7 @@ OopStorage::OopStorage(const char* name, MEMFLAGS memflags) :
_num_dead_callback(nullptr),
_allocation_count(0),
_concurrent_iteration_count(0),
- _memflags(memflags),
+ _mem_tag(mem_tag),
_needs_cleanup(false)
{
_active_array->increment_refcount();
@@ -1030,7 +1030,7 @@ size_t OopStorage::total_memory_usage() const {
return total_size;
}
-MEMFLAGS OopStorage::memflags() const { return _memflags; }
+MemTag OopStorage::mem_tag() const { return _mem_tag; }
// Parallel iteration support
@@ -1135,6 +1135,26 @@ void OopStorage::BasicParState::report_num_dead() const {
const char* OopStorage::name() const { return _name; }
+bool OopStorage::print_containing(const oop* addr, outputStream* st) {
+ if (addr != nullptr) {
+ Block* block = find_block_or_null(addr);
+ if (block != nullptr && block->print_containing(addr, st)) {
+ st->print(" in oop storage \"%s\"", name());
+ return true;
+ }
+ }
+ return false;
+}
+
+bool OopStorage::Block::print_containing(const oop* addr, outputStream* st) {
+ if (contains(addr)) {
+ st->print(PTR_FORMAT " is a pointer %u/%zu into block %zu",
+ p2i(addr), get_index(addr), ARRAY_SIZE(_data), _active_index);
+ return true;
+ }
+ return false;
+}
+
#ifndef PRODUCT
void OopStorage::print_on(outputStream* st) const {
diff --git a/src/hotspot/share/gc/shared/oopStorage.hpp b/src/hotspot/share/gc/shared/oopStorage.hpp
index dfc0f83fc1912..96cc5a23d6a91 100644
--- a/src/hotspot/share/gc/shared/oopStorage.hpp
+++ b/src/hotspot/share/gc/shared/oopStorage.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -74,7 +74,7 @@ class outputStream;
class OopStorage : public CHeapObjBase {
public:
- static OopStorage* create(const char* name, MEMFLAGS memflags);
+ static OopStorage* create(const char* name, MemTag mem_tag);
~OopStorage();
// These count and usage accessors are racy unless at a safepoint.
@@ -89,8 +89,8 @@ class OopStorage : public CHeapObjBase {
// bookkeeping overhead, including this storage object.
size_t total_memory_usage() const;
- // The memory type for allocations.
- MEMFLAGS memflags() const;
+ // The memory tag for allocations.
+ MemTag mem_tag() const;
enum EntryStatus {
INVALID_ENTRY,
@@ -213,6 +213,7 @@ class OopStorage : public CHeapObjBase {
// Debugging and logging support.
const char* name() const;
void print_on(outputStream* st) const PRODUCT_RETURN;
+ bool print_containing(const oop* addr, outputStream* st);
// Provides access to storage internals, for unit testing.
// Declare, but not define, the public class OopStorage::TestAccess.
@@ -273,14 +274,14 @@ class OopStorage : public CHeapObjBase {
// mutable because this gets set even for const iteration.
mutable int _concurrent_iteration_count;
- // The memory type for allocations.
- MEMFLAGS _memflags;
+ // The memory tag for allocations.
+ MemTag _mem_tag;
// Flag indicating this storage object is a candidate for empty block deletion.
volatile bool _needs_cleanup;
// Clients construct via "create" factory function.
- OopStorage(const char* name, MEMFLAGS memflags);
+ OopStorage(const char* name, MemTag mem_tag);
NONCOPYABLE(OopStorage);
bool try_add_block();
diff --git a/src/hotspot/share/gc/shared/oopStorage.inline.hpp b/src/hotspot/share/gc/shared/oopStorage.inline.hpp
index e1e815acd094e..545da0be0a76e 100644
--- a/src/hotspot/share/gc/shared/oopStorage.inline.hpp
+++ b/src/hotspot/share/gc/shared/oopStorage.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -60,7 +60,7 @@ class OopStorage::ActiveArray {
public:
static ActiveArray* create(size_t size,
- MEMFLAGS memflags = mtGC,
+ MemTag mem_tag = mtGC,
AllocFailType alloc_fail = AllocFailStrategy::EXIT_OOM);
static void destroy(ActiveArray* ba);
@@ -196,6 +196,8 @@ class OopStorage::Block /* No base class, to avoid messing up alignment. */ {
template bool iterate(F f);
template bool iterate(F f) const;
+
+ bool print_containing(const oop* addr, outputStream* st);
}; // class Block
inline OopStorage::Block* OopStorage::AllocationList::head() {
diff --git a/src/hotspot/share/gc/shared/oopStorageSet.cpp b/src/hotspot/share/gc/shared/oopStorageSet.cpp
index e119e570759a4..e3a9fccbad3dc 100644
--- a/src/hotspot/share/gc/shared/oopStorageSet.cpp
+++ b/src/hotspot/share/gc/shared/oopStorageSet.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -31,18 +31,18 @@
OopStorage* OopStorageSet::_storages[all_count] = {};
-OopStorage* OopStorageSet::create_strong(const char* name, MEMFLAGS memflags) {
+OopStorage* OopStorageSet::create_strong(const char* name, MemTag mem_tag) {
static uint registered_strong = 0;
assert(registered_strong < strong_count, "More registered strong storages than slots");
- OopStorage* storage = OopStorage::create(name, memflags);
+ OopStorage* storage = OopStorage::create(name, mem_tag);
_storages[strong_start + registered_strong++] = storage;
return storage;
}
-OopStorage* OopStorageSet::create_weak(const char* name, MEMFLAGS memflags) {
+OopStorage* OopStorageSet::create_weak(const char* name, MemTag mem_tag) {
static uint registered_weak = 0;
assert(registered_weak < weak_count, "More registered strong storages than slots");
- OopStorage* storage = OopStorage::create(name, memflags);
+ OopStorage* storage = OopStorage::create(name, mem_tag);
_storages[weak_start + registered_weak++] = storage;
return storage;
}
@@ -82,6 +82,25 @@ template OopStorage* OopStorageSet::get_storage(StrongId);
template OopStorage* OopStorageSet::get_storage(WeakId);
template OopStorage* OopStorageSet::get_storage(Id);
+bool OopStorageSet::print_containing(const void* addr, outputStream* st) {
+ if (addr != nullptr) {
+ const void* aligned_addr = align_down(addr, alignof(oop));
+ for (OopStorage* storage : Range()) {
+ // Check for null for extra safety: might get here while handling error
+ // before storage initialization.
+ if ((storage != nullptr) && storage->print_containing((oop*) aligned_addr, st)) {
+ if (aligned_addr != addr) {
+ st->print_cr(" (unaligned)");
+ } else {
+ st->cr();
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
#ifdef ASSERT
void OopStorageSet::verify_initialized(uint index) {
diff --git a/src/hotspot/share/gc/shared/oopStorageSet.hpp b/src/hotspot/share/gc/shared/oopStorageSet.hpp
index 26e0e9f5a7775..867172c41ad74 100644
--- a/src/hotspot/share/gc/shared/oopStorageSet.hpp
+++ b/src/hotspot/share/gc/shared/oopStorageSet.hpp
@@ -25,7 +25,8 @@
#ifndef SHARE_GC_SHARED_OOPSTORAGESET_HPP
#define SHARE_GC_SHARED_OOPSTORAGESET_HPP
-#include "nmt/memflags.hpp"
+#include "nmt/memTag.hpp"
+#include "oops/oop.hpp"
#include "utilities/debug.hpp"
#include "utilities/enumIterator.hpp"
#include "utilities/globalDefinitions.hpp"
@@ -79,8 +80,8 @@ class OopStorageSet : public AllStatic {
static OopStorage* storage(WeakId id) { return get_storage(id); }
static OopStorage* storage(Id id) { return get_storage(id); }
- static OopStorage* create_strong(const char* name, MEMFLAGS memflags);
- static OopStorage* create_weak(const char* name, MEMFLAGS memflags);
+ static OopStorage* create_strong(const char* name, MemTag mem_tag);
+ static OopStorage* create_weak(const char* name, MemTag mem_tag);
// Support iteration over the storage objects.
template class Range;
@@ -89,6 +90,8 @@ class OopStorageSet : public AllStatic {
template
static void strong_oops_do(Closure* cl);
+ // Debugging: print location info, if in storage.
+ static bool print_containing(const void* addr, outputStream* st);
};
ENUMERATOR_VALUE_RANGE(OopStorageSet::StrongId,
diff --git a/src/hotspot/share/gc/shared/partialArrayState.cpp b/src/hotspot/share/gc/shared/partialArrayState.cpp
index fd23a32022208..48ef974ecfa53 100644
--- a/src/hotspot/share/gc/shared/partialArrayState.cpp
+++ b/src/hotspot/share/gc/shared/partialArrayState.cpp
@@ -26,7 +26,7 @@
#include "gc/shared/partialArrayState.hpp"
#include "memory/allocation.inline.hpp"
#include "memory/arena.hpp"
-#include "nmt/memflags.hpp"
+#include "nmt/memTag.hpp"
#include "oops/oopsHierarchy.hpp"
#include "runtime/atomic.hpp"
#include "runtime/orderAccess.hpp"
diff --git a/src/hotspot/share/gc/shared/stringdedup/stringDedupProcessor.cpp b/src/hotspot/share/gc/shared/stringdedup/stringDedupProcessor.cpp
index aab2f5d312399..ab85c293941df 100644
--- a/src/hotspot/share/gc/shared/stringdedup/stringDedupProcessor.cpp
+++ b/src/hotspot/share/gc/shared/stringdedup/stringDedupProcessor.cpp
@@ -35,7 +35,7 @@
#include "gc/shared/stringdedup/stringDedupTable.hpp"
#include "logging/log.hpp"
#include "memory/iterator.hpp"
-#include "nmt/memflags.hpp"
+#include "nmt/memTag.hpp"
#include "oops/access.inline.hpp"
#include "runtime/atomic.hpp"
#include "runtime/cpuTimeCounters.hpp"
diff --git a/src/hotspot/share/gc/shared/taskqueue.hpp b/src/hotspot/share/gc/shared/taskqueue.hpp
index f4a3731583bc9..efbc1882fbed8 100644
--- a/src/hotspot/share/gc/shared/taskqueue.hpp
+++ b/src/hotspot/share/gc/shared/taskqueue.hpp
@@ -116,8 +116,8 @@ void TaskQueueStats::reset() {
// TaskQueueSuper collects functionality common to all GenericTaskQueue instances.
-template
-class TaskQueueSuper: public CHeapObj {
+template
+class TaskQueueSuper: public CHeapObj {
protected:
// Internal type for indexing the queue; also used for the tag.
typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t;
@@ -324,39 +324,39 @@ class TaskQueueSuper: public CHeapObj {
// practice of parallel programming (PPoPP 2013), 69-80
//
-template
-class GenericTaskQueue: public TaskQueueSuper {
+template
+class GenericTaskQueue: public TaskQueueSuper {
protected:
- typedef typename TaskQueueSuper::Age Age;
- typedef typename TaskQueueSuper::idx_t idx_t;
+ typedef typename TaskQueueSuper::Age Age;
+ typedef typename TaskQueueSuper::idx_t idx_t;
- using TaskQueueSuper::MOD_N_MASK;
+ using TaskQueueSuper::MOD_N_MASK;
- using TaskQueueSuper::bottom_relaxed;
- using TaskQueueSuper::bottom_acquire;
+ using TaskQueueSuper::bottom_relaxed;
+ using TaskQueueSuper::bottom_acquire;
- using TaskQueueSuper::set_bottom_relaxed;
- using TaskQueueSuper::release_set_bottom;
+ using TaskQueueSuper::set_bottom_relaxed;
+ using TaskQueueSuper::release_set_bottom;
- using TaskQueueSuper::age_relaxed;
- using TaskQueueSuper::set_age_relaxed;
- using TaskQueueSuper::cmpxchg_age;
- using TaskQueueSuper::age_top_relaxed;
+ using TaskQueueSuper::age_relaxed;
+ using TaskQueueSuper::set_age_relaxed;
+ using TaskQueueSuper::cmpxchg_age;
+ using TaskQueueSuper::age_top_relaxed;
- using TaskQueueSuper::increment_index;
- using TaskQueueSuper::decrement_index;
- using TaskQueueSuper::dirty_size;
- using TaskQueueSuper::clean_size;
- using TaskQueueSuper::assert_not_underflow;
+ using TaskQueueSuper::increment_index;
+ using TaskQueueSuper::decrement_index;
+ using TaskQueueSuper::dirty_size;
+ using TaskQueueSuper::clean_size;
+ using TaskQueueSuper::assert_not_underflow;
public:
- typedef typename TaskQueueSuper::PopResult PopResult;
+ typedef typename TaskQueueSuper::PopResult PopResult;
- using TaskQueueSuper::max_elems;
- using TaskQueueSuper::size;
+ using TaskQueueSuper::max_elems;
+ using TaskQueueSuper::size;
#if TASKQUEUE_STATS
- using TaskQueueSuper::stats;
+ using TaskQueueSuper::stats;
#endif
private:
@@ -428,12 +428,12 @@ class GenericTaskQueue: public TaskQueueSuper {
// Note that size() is not hidden--it returns the number of elements in the
// TaskQueue, and does not include the size of the overflow stack. This
// simplifies replacement of GenericTaskQueues with OverflowTaskQueues.
-template
-class OverflowTaskQueue: public GenericTaskQueue
+template
+class OverflowTaskQueue: public GenericTaskQueue
{
public:
- typedef Stack overflow_t;
- typedef GenericTaskQueue taskqueue_t;
+ typedef Stack overflow_t;
+ typedef GenericTaskQueue taskqueue_t;
TASKQUEUE_STATS_ONLY(using taskqueue_t::stats;)
@@ -467,11 +467,11 @@ class TaskQueueSetSuper {
virtual uint tasks() const = 0;
};
-template class TaskQueueSetSuperImpl: public CHeapObj, public TaskQueueSetSuper {
+template class TaskQueueSetSuperImpl: public CHeapObj, public TaskQueueSetSuper {
};
-template
-class GenericTaskQueueSet: public TaskQueueSetSuperImpl {
+template
+class GenericTaskQueueSet: public TaskQueueSetSuperImpl {
public:
typedef typename T::element_type E;
typedef typename T::PopResult PopResult;
@@ -518,29 +518,29 @@ class GenericTaskQueueSet: public TaskQueueSetSuperImpl {
#endif // TASKQUEUE_STATS
};
-template void
-GenericTaskQueueSet::register_queue(uint i, T* q) {
+template void
+GenericTaskQueueSet::register_queue(uint i, T* q) {
assert(i < _n, "index out of range.");
_queues[i] = q;
}
-template T*
-GenericTaskQueueSet::queue(uint i) {
+template T*
+GenericTaskQueueSet::queue(uint i) {
assert(i < _n, "index out of range.");
return _queues[i];
}
#ifdef ASSERT
-template
-void GenericTaskQueueSet::assert_empty() const {
+template
+void GenericTaskQueueSet::assert_empty() const {
for (uint j = 0; j < _n; j++) {
_queues[j]->assert_empty();
}
}
#endif // ASSERT
-template
-uint GenericTaskQueueSet::tasks() const {
+template
+uint GenericTaskQueueSet::tasks() const {
uint n = 0;
for (uint j = 0; j < _n; j++) {
n += _queues[j]->size();
diff --git a/src/hotspot/share/gc/shared/taskqueue.inline.hpp b/src/hotspot/share/gc/shared/taskqueue.inline.hpp
index f937ce8a2e993..8e65cfd704fda 100644
--- a/src/hotspot/share/gc/shared/taskqueue.inline.hpp
+++ b/src/hotspot/share/gc/shared/taskqueue.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -38,30 +38,30 @@
#include "utilities/ostream.hpp"
#include "utilities/stack.inline.hpp"
-template
-inline GenericTaskQueueSet::GenericTaskQueueSet(uint n) : _n(n) {
+template
+inline GenericTaskQueueSet::GenericTaskQueueSet(uint n) : _n(n) {
typedef T* GenericTaskQueuePtr;
- _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n, F);
+ _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n, MT);
for (uint i = 0; i < n; i++) {
_queues[i] = nullptr;
}
}
-template
-inline GenericTaskQueueSet::~GenericTaskQueueSet() {
+template