Skip to content

Commit

Permalink
Merge pull request #315 from rdolbeau/arm-sve-clean
Browse files Browse the repository at this point in the history
PR to merge ARM SVE branch
  • Loading branch information
matteo-frigo authored Feb 1, 2025
2 parents 187045e + ff3dfb0 commit 33dded1
Show file tree
Hide file tree
Showing 33 changed files with 902 additions and 5 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ rdft/simd/common/*.c
rdft/simd/kcvi/*.c
rdft/simd/neon/*.c
rdft/simd/sse2/*.c
rdft/simd/sve*/*.c
rdft/simd/vsx/*.c
rdft/scalar/r2cb/*.c
rdft/scalar/r2cf/*.c
Expand All @@ -45,6 +46,7 @@ dft/simd/common/*.c
dft/simd/kcvi/*.c
dft/simd/neon/*.c
dft/simd/sse2/*.c
dft/simd/sve*/*.c
dft/simd/vsx/*.c

# other generated files
Expand All @@ -54,6 +56,8 @@ api/fftw3*.f*
*.cmake
mpi/f03-wrap.c
mpi/fftw3*-mpi.f*
simd-support/vtw.h
simd-support/generate_vtw

# other build products
tests/bench
Expand Down
17 changes: 16 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,21 @@ NEON_LIBS = dft/simd/neon/libdft_neon_codelets.la \
rdft/simd/neon/librdft_neon_codelets.la
endif

if HAVE_SVE
SVE_LIBS = \
dft/simd/sve128/libdft_sve128_codelets.la \
rdft/simd/sve128/librdft_sve128_codelets.la \
dft/simd/sve256/libdft_sve256_codelets.la \
rdft/simd/sve256/librdft_sve256_codelets.la \
dft/simd/sve512/libdft_sve512_codelets.la \
rdft/simd/sve512/librdft_sve512_codelets.la \
dft/simd/sve1024/libdft_sve1024_codelets.la \
rdft/simd/sve1024/librdft_sve1024_codelets.la \
dft/simd/sve2048/libdft_sve2048_codelets.la \
rdft/simd/sve2048/librdft_sve2048_codelets.la
endif


if HAVE_GENERIC_SIMD128
GENERIC_SIMD128_LIBS = dft/simd/generic-simd128/libdft_generic_simd128_codelets.la \
rdft/simd/generic-simd128/librdft_generic_simd128_codelets.la
Expand Down Expand Up @@ -126,7 +141,7 @@ libfftw3@PREC_SUFFIX@_la_LIBADD = \
api/libapi.la \
$(SIMD_LIBS) $(SSE2_LIBS) $(AVX_LIBS) $(AVX_128_FMA_LIBS) \
$(AVX2_LIBS) $(ALTIVEC_LIBS) \
$(VSX_LIBS) $(NEON_LIBS) $(KCVI_LIBS) $(AVX512_LIBS) \
$(VSX_LIBS) $(NEON_LIBS) $(SVE_LIBS) $(KCVI_LIBS) $(AVX512_LIBS) \
$(GENERIC_SIMD128_LIBS) $(GENERIC_SIMD256_LIBS) \
$(COMBINED_THREADLIBS)

Expand Down
4 changes: 4 additions & 0 deletions api/version.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ const char X(version)[] = PACKAGE "-" PACKAGE_VERSION
"-neon"
#endif

#if HAVE_SVE
"-sve"
#endif

#if defined(HAVE_GENERIC_SIMD128)
"-generic_simd128"
#endif
Expand Down
22 changes: 22 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,11 @@ if test "$have_generic_simd256" = "yes"; then
fi
AM_CONDITIONAL(HAVE_GENERIC_SIMD256, test "$have_generic_simd256" = "yes")

AC_ARG_ENABLE(sve, [AC_HELP_STRING([--enable-sve],[enable ARM SVE optimizations])], have_sve=$enableval, have_sve=no)
if test "$have_sve" = "yes"; then
AC_DEFINE(HAVE_SVE,1,[Define to enable ARM SVE optimizations])
fi
AM_CONDITIONAL(HAVE_SVE, test "$have_sve" = "yes")

dnl FIXME:
dnl AC_ARG_ENABLE(mips-ps, [AS_HELP_STRING([--enable-mips-ps],[enable MIPS pair-single optimizations])], have_mips_ps=$enableval, have_mips_ps=no)
Expand Down Expand Up @@ -677,6 +682,13 @@ if test "$enable_openmp" = "yes"; then
AX_OPENMP([], [AC_MSG_ERROR([don't know how to enable OpenMP])])
fi

if test "$have_sve" = "yes"; then
ACX_SVE([sve_ok=yes], [sve_ok=no])
if test "$sve_ok" != "yes"; then
AC_MSG_ERROR([Cannot build a SVE program, aborting])
fi
fi

AC_ARG_ENABLE(threads, [AS_HELP_STRING([--enable-threads],[compile FFTW SMP threads library])], enable_threads=$enableval, enable_threads=no)

if test "$enable_threads" = "yes"; then
Expand Down Expand Up @@ -766,6 +778,11 @@ AC_CONFIG_FILES([
dft/simd/altivec/Makefile
dft/simd/vsx/Makefile
dft/simd/neon/Makefile
dft/simd/sve128/Makefile
dft/simd/sve256/Makefile
dft/simd/sve512/Makefile
dft/simd/sve1024/Makefile
dft/simd/sve2048/Makefile
dft/simd/generic-simd128/Makefile
dft/simd/generic-simd256/Makefile
Expand All @@ -786,6 +803,11 @@ AC_CONFIG_FILES([
rdft/simd/altivec/Makefile
rdft/simd/vsx/Makefile
rdft/simd/neon/Makefile
rdft/simd/sve128/Makefile
rdft/simd/sve256/Makefile
rdft/simd/sve512/Makefile
rdft/simd/sve1024/Makefile
rdft/simd/sve2048/Makefile
rdft/simd/generic-simd128/Makefile
rdft/simd/generic-simd256/Makefile
Expand Down
5 changes: 5 additions & 0 deletions dft/codelet-dft.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ extern const solvtab X(solvtab_dft_kcvi);
extern const solvtab X(solvtab_dft_altivec);
extern const solvtab X(solvtab_dft_vsx);
extern const solvtab X(solvtab_dft_neon);
extern const solvtab X(solvtab_dft_sve128);
extern const solvtab X(solvtab_dft_sve256);
extern const solvtab X(solvtab_dft_sve512);
extern const solvtab X(solvtab_dft_sve1024);
extern const solvtab X(solvtab_dft_sve2048);
extern const solvtab X(solvtab_dft_generic_simd128);
extern const solvtab X(solvtab_dft_generic_simd256);

Expand Down
12 changes: 12 additions & 0 deletions dft/conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,18 @@ void X(dft_conf_standard)(planner *p)
if (X(have_simd_neon)())
X(solvtab_exec)(X(solvtab_dft_neon), p);
#endif
#if HAVE_SVE
if (X(have_simd_sve)(128))
X(solvtab_exec)(X(solvtab_dft_sve128), p);
if (X(have_simd_sve)(256))
X(solvtab_exec)(X(solvtab_dft_sve256), p);
if (X(have_simd_sve)(512))
X(solvtab_exec)(X(solvtab_dft_sve512), p);
if (X(have_simd_sve)(1024))
X(solvtab_exec)(X(solvtab_dft_sve1024), p);
if (X(have_simd_sve)(2048))
X(solvtab_exec)(X(solvtab_dft_sve2048), p);
#endif
#if HAVE_GENERIC_SIMD128
X(solvtab_exec)(X(solvtab_dft_generic_simd128), p);
#endif
Expand Down
2 changes: 1 addition & 1 deletion dft/simd/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AM_CPPFLAGS = -I $(top_srcdir)
SUBDIRS = common sse2 avx avx-128-fma avx2 avx2-128 avx512 kcvi altivec vsx neon generic-simd128 generic-simd256
SUBDIRS = common sse2 avx avx-128-fma avx2 avx2-128 avx512 kcvi altivec vsx neon sve128 sve256 sve512 sve1024 sve2048 generic-simd128 generic-simd256
EXTRA_DIST = n1b.h n1f.h n2b.h n2f.h n2s.h q1b.h q1f.h t1b.h t1bu.h \
t1f.h t1fu.h t2b.h t2f.h t3b.h t3f.h ts.h codlist.mk simd.mk
13 changes: 13 additions & 0 deletions dft/simd/sve1024/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_CFLAGS = $(SVE_CFLAGS)
SIMD_HEADER=simd-support/simd-maskedsve1024.h

include $(top_srcdir)/dft/simd/codlist.mk
include $(top_srcdir)/dft/simd/simd.mk

if HAVE_SVE

BUILT_SOURCES = $(EXTRA_DIST)
noinst_LTLIBRARIES = libdft_sve1024_codelets.la
libdft_sve1024_codelets_la_SOURCES = $(BUILT_SOURCES)

endif
13 changes: 13 additions & 0 deletions dft/simd/sve128/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_CFLAGS = $(SVE_CFLAGS)
SIMD_HEADER=simd-support/simd-maskedsve128.h

include $(top_srcdir)/dft/simd/codlist.mk
include $(top_srcdir)/dft/simd/simd.mk

if HAVE_SVE

BUILT_SOURCES = $(EXTRA_DIST)
noinst_LTLIBRARIES = libdft_sve128_codelets.la
libdft_sve128_codelets_la_SOURCES = $(BUILT_SOURCES)

endif
13 changes: 13 additions & 0 deletions dft/simd/sve2048/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_CFLAGS = $(SVE_CFLAGS)
SIMD_HEADER=simd-support/simd-maskedsve2048.h

include $(top_srcdir)/dft/simd/codlist.mk
include $(top_srcdir)/dft/simd/simd.mk

if HAVE_SVE

BUILT_SOURCES = $(EXTRA_DIST)
noinst_LTLIBRARIES = libdft_sve2048_codelets.la
libdft_sve2048_codelets_la_SOURCES = $(BUILT_SOURCES)

endif
13 changes: 13 additions & 0 deletions dft/simd/sve256/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_CFLAGS = $(SVE_CFLAGS)
SIMD_HEADER=simd-support/simd-maskedsve256.h

include $(top_srcdir)/dft/simd/codlist.mk
include $(top_srcdir)/dft/simd/simd.mk

if HAVE_SVE

BUILT_SOURCES = $(EXTRA_DIST)
noinst_LTLIBRARIES = libdft_sve256_codelets.la
libdft_sve256_codelets_la_SOURCES = $(BUILT_SOURCES)

endif
13 changes: 13 additions & 0 deletions dft/simd/sve512/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_CFLAGS = $(SVE_CFLAGS)
SIMD_HEADER=simd-support/simd-maskedsve512.h

include $(top_srcdir)/dft/simd/codlist.mk
include $(top_srcdir)/dft/simd/simd.mk

if HAVE_SVE

BUILT_SOURCES = $(EXTRA_DIST)
noinst_LTLIBRARIES = libdft_sve512_codelets.la
libdft_sve512_codelets_la_SOURCES = $(BUILT_SOURCES)

endif
1 change: 1 addition & 0 deletions kernel/ifftw.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ extern int X(have_simd_avx512)(void);
extern int X(have_simd_altivec)(void);
extern int X(have_simd_vsx)(void);
extern int X(have_simd_neon)(void);
extern int X(have_simd_sve)(int minwidth);

/* forward declarations */
typedef struct problem_s problem;
Expand Down
26 changes: 26 additions & 0 deletions m4/acx_sve.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
dnl @synopsis ACX_SVE([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
dnl @summary figure out whether a simple SVE program can be compiled
dnl @category InstalledPackages
dnl
dnl This macro tries to compile a simple SVE program that uses
dnl the ACLE SVE extensions.
dnl
dnl ACTION-IF-FOUND is a list of shell commands to run if a SVE
dnl program can be compiled, and ACTION-IF-NOT-FOUND is a list of commands
dnl to run it cannot.
dnl
dnl @version 2024-04-15
dnl @license GPLWithACException
dnl @author Gilles Gouaillardet <[email protected]>

AC_DEFUN([ACX_SVE], [
AC_MSG_CHECKING([whether a SVE program can be compiled])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <arm_sve.h>]],
[[#if defined(__GNUC__) && !defined(__ARM_FEATURE_SVE)
#error compiling without SVE support
#endif]])],[AC_MSG_RESULT([yes])
$1],
[AC_MSG_RESULT([no])
$2])
])dnl ACX_SVE
5 changes: 5 additions & 0 deletions rdft/codelet-rdft.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ extern const solvtab X(solvtab_rdft_kcvi);
extern const solvtab X(solvtab_rdft_altivec);
extern const solvtab X(solvtab_rdft_vsx);
extern const solvtab X(solvtab_rdft_neon);
extern const solvtab X(solvtab_rdft_sve128);
extern const solvtab X(solvtab_rdft_sve256);
extern const solvtab X(solvtab_rdft_sve512);
extern const solvtab X(solvtab_rdft_sve1024);
extern const solvtab X(solvtab_rdft_sve2048);
extern const solvtab X(solvtab_rdft_generic_simd128);
extern const solvtab X(solvtab_rdft_generic_simd256);

Expand Down
12 changes: 12 additions & 0 deletions rdft/conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,18 @@ void X(rdft_conf_standard)(planner *p)
if (X(have_simd_neon)())
X(solvtab_exec)(X(solvtab_rdft_neon), p);
#endif
#if HAVE_SVE
if (X(have_simd_sve)(128))
X(solvtab_exec)(X(solvtab_rdft_sve128), p);
if (X(have_simd_sve)(256))
X(solvtab_exec)(X(solvtab_rdft_sve256), p);
if (X(have_simd_sve)(512))
X(solvtab_exec)(X(solvtab_rdft_sve512), p);
if (X(have_simd_sve)(1024))
X(solvtab_exec)(X(solvtab_rdft_sve1024), p);
if (X(have_simd_sve)(2048))
X(solvtab_exec)(X(solvtab_rdft_sve2048), p);
#endif
#if HAVE_GENERIC_SIMD128
X(solvtab_exec)(X(solvtab_rdft_generic_simd128), p);
#endif
Expand Down
2 changes: 1 addition & 1 deletion rdft/simd/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

AM_CPPFLAGS = -I $(top_srcdir)
SUBDIRS = common sse2 avx avx-128-fma avx2 avx2-128 avx512 kcvi altivec vsx neon generic-simd128 generic-simd256
SUBDIRS = common sse2 avx avx-128-fma avx2 avx2-128 avx512 kcvi altivec vsx neon sve128 sve256 sve512 sve1024 sve2048 generic-simd128 generic-simd256
EXTRA_DIST = hc2cbv.h hc2cfv.h codlist.mk simd.mk
13 changes: 13 additions & 0 deletions rdft/simd/sve1024/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_CFLAGS = $(SVE_CFLAGS)
SIMD_HEADER=simd-support/simd-maskedsve1024.h

include $(top_srcdir)/rdft/simd/codlist.mk
include $(top_srcdir)/rdft/simd/simd.mk

if HAVE_SVE

noinst_LTLIBRARIES = librdft_sve1024_codelets.la
BUILT_SOURCES = $(EXTRA_DIST)
librdft_sve1024_codelets_la_SOURCES = $(BUILT_SOURCES)

endif
13 changes: 13 additions & 0 deletions rdft/simd/sve128/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_CFLAGS = $(SVE_CFLAGS)
SIMD_HEADER=simd-support/simd-maskedsve128.h

include $(top_srcdir)/rdft/simd/codlist.mk
include $(top_srcdir)/rdft/simd/simd.mk

if HAVE_SVE

noinst_LTLIBRARIES = librdft_sve128_codelets.la
BUILT_SOURCES = $(EXTRA_DIST)
librdft_sve128_codelets_la_SOURCES = $(BUILT_SOURCES)

endif
13 changes: 13 additions & 0 deletions rdft/simd/sve2048/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_CFLAGS = $(SVE_CFLAGS)
SIMD_HEADER=simd-support/simd-maskedsve2048.h

include $(top_srcdir)/rdft/simd/codlist.mk
include $(top_srcdir)/rdft/simd/simd.mk

if HAVE_SVE

noinst_LTLIBRARIES = librdft_sve2048_codelets.la
BUILT_SOURCES = $(EXTRA_DIST)
librdft_sve2048_codelets_la_SOURCES = $(BUILT_SOURCES)

endif
13 changes: 13 additions & 0 deletions rdft/simd/sve256/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_CFLAGS = $(SVE_CFLAGS)
SIMD_HEADER=simd-support/simd-maskedsve256.h

include $(top_srcdir)/rdft/simd/codlist.mk
include $(top_srcdir)/rdft/simd/simd.mk

if HAVE_SVE

noinst_LTLIBRARIES = librdft_sve256_codelets.la
BUILT_SOURCES = $(EXTRA_DIST)
librdft_sve256_codelets_la_SOURCES = $(BUILT_SOURCES)

endif
13 changes: 13 additions & 0 deletions rdft/simd/sve512/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_CFLAGS = $(SVE_CFLAGS)
SIMD_HEADER=simd-support/simd-maskedsve512.h

include $(top_srcdir)/rdft/simd/codlist.mk
include $(top_srcdir)/rdft/simd/simd.mk

if HAVE_SVE

noinst_LTLIBRARIES = librdft_sve512_codelets.la
BUILT_SOURCES = $(EXTRA_DIST)
librdft_sve512_codelets_la_SOURCES = $(BUILT_SOURCES)

endif
19 changes: 18 additions & 1 deletion simd-support/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@
AM_CPPFLAGS = -I $(top_srcdir)
noinst_LTLIBRARIES = libsimd_support.la

noinst_PROGRAMS =

if MAINTAINER_MODE
noinst_PROGRAMS += generate_vtw

vtw.h: generate_vtw
$(top_srcdir)/simd-support/generate_vtw.sh > vtw.h

generate_vtw_SOURCES = generate_vtw.c

sve.c: vtw.h
endif

libsimd_support_la: vtw.h

libsimd_support_la_SOURCES = taint.c simd-common.h \
x86-cpuid.h amd64-cpuid.h \
simd-sse2.h sse2.c \
Expand All @@ -11,5 +26,7 @@ avx512.c simd-avx512.h \
kcvi.c simd-kcvi.h \
altivec.c simd-altivec.h vsx.c simd-vsx.h \
neon.c simd-neon.h \
simd-generic128.h simd-generic256.h
simd-generic128.h simd-generic256.h \
sve.c simd-maskedsve.h simd-maskedsve128.h simd-maskedsve256.h simd-maskedsve512.h simd-maskedsve1024.h simd-maskedsve2048.h vtw.h

EXTRA_DIST = generate_vtw.sh
Loading

0 comments on commit 33dded1

Please sign in to comment.