diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6e68d803a..5047557f5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -43,8 +43,11 @@ set (SFIZZ_HEADERS sfizz/effects/impl/ResonantArrayAVX.h sfizz/effects/impl/ResonantArraySSE.h sfizz/effects/impl/ResonantString.h + sfizz/effects/impl/ResonantStringInline.h sfizz/effects/impl/ResonantStringAVX.h + sfizz/effects/impl/ResonantStringAVXInline.h sfizz/effects/impl/ResonantStringSSE.h + sfizz/effects/impl/ResonantStringSSEInline.h sfizz/effects/Apan.h sfizz/effects/CommonLFO.h sfizz/effects/CommonLFO.hpp diff --git a/src/sfizz/effects/impl/ResonantArray.cpp b/src/sfizz/effects/impl/ResonantArray.cpp index 59204916c..f426bbc5d 100644 --- a/src/sfizz/effects/impl/ResonantArray.cpp +++ b/src/sfizz/effects/impl/ResonantArray.cpp @@ -6,6 +6,7 @@ #include "ResonantArray.h" #include "ResonantString.h" +#include "ResonantStringInline.h" #include "SIMDHelpers.h" namespace sfz { diff --git a/src/sfizz/effects/impl/ResonantArrayAVX.cpp b/src/sfizz/effects/impl/ResonantArrayAVX.cpp index 40ac0df35..49f208349 100644 --- a/src/sfizz/effects/impl/ResonantArrayAVX.cpp +++ b/src/sfizz/effects/impl/ResonantArrayAVX.cpp @@ -5,6 +5,7 @@ // If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz #include "ResonantArrayAVX.h" +#include "ResonantStringAVXInline.h" #include "Config.h" #include @@ -82,7 +83,29 @@ void ResonantArrayAVX::process(const float *inPtr, float *outPtr, unsigned numFr __m256* outputs8 = reinterpret_cast<__m256*>(_workBuffer.data()); std::memset(outputs8, 0, numFrames * sizeof(__m256)); - for (unsigned p = 0; p < numStringPacks; ++p) { + unsigned p = 0; + for (; p + 7 < numStringPacks; p += 8) { + ResonantStringAVX& rs = reinterpret_cast(stringPacks[p]); + for (unsigned i = 0; i < numFrames; ++i) { + __m256 o1 = rs.process(_mm256_broadcast_ss(&inPtr[i])); + __m256 o2 = rs.process(_mm256_broadcast_ss(&inPtr[i + 1])); + __m256 o3 = rs.process(_mm256_broadcast_ss(&inPtr[i + 2])); + __m256 o4 = rs.process(_mm256_broadcast_ss(&inPtr[i + 3])); + __m256 o5 = rs.process(_mm256_broadcast_ss(&inPtr[i + 4])); + __m256 o6 = rs.process(_mm256_broadcast_ss(&inPtr[i + 5])); + __m256 o7 = rs.process(_mm256_broadcast_ss(&inPtr[i + 6])); + __m256 o8 = rs.process(_mm256_broadcast_ss(&inPtr[i + 7])); + __m256 output8 = outputs8[i]; + o1 = _mm256_add_ps(o1, o2); + o2 = _mm256_add_ps(o3, o4); + o3 = _mm256_add_ps(o5, o6); + o4 = _mm256_add_ps(o7, o8); + o1 = _mm256_add_ps(o1, o2); + o2 = _mm256_add_ps(o3, o4); + outputs8[i] = _mm256_add_ps(o1, _mm256_add_ps(output8, o2)); + } + } + for (; p < numStringPacks; ++p) { ResonantStringAVX& rs = reinterpret_cast(stringPacks[p]); for (unsigned i = 0; i < numFrames; ++i) outputs8[i] = _mm256_add_ps( diff --git a/src/sfizz/effects/impl/ResonantArraySSE.cpp b/src/sfizz/effects/impl/ResonantArraySSE.cpp index b72f989cf..80cd8123e 100644 --- a/src/sfizz/effects/impl/ResonantArraySSE.cpp +++ b/src/sfizz/effects/impl/ResonantArraySSE.cpp @@ -5,6 +5,7 @@ // If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz #include "ResonantArraySSE.h" +#include "ResonantStringSSEInline.h" #include "Config.h" #include @@ -82,7 +83,29 @@ void ResonantArraySSE::process(const float *inPtr, float *outPtr, unsigned numFr __m128* outputs4 = reinterpret_cast<__m128*>(_workBuffer.data()); std::memset(outputs4, 0, numFrames * sizeof(__m128)); - for (unsigned p = 0; p < numStringPacks; ++p) { + unsigned p = 0; + for (; p + 7 < numStringPacks; p += 8) { + ResonantStringSSE& rs = reinterpret_cast(stringPacks[p]); + for (unsigned i = 0; i < numFrames; ++i) { + __m128 o1 = rs.process(_mm_load1_ps(&inPtr[i])); + __m128 o2 = rs.process(_mm_load1_ps(&inPtr[i + 1])); + __m128 o3 = rs.process(_mm_load1_ps(&inPtr[i + 2])); + __m128 o4 = rs.process(_mm_load1_ps(&inPtr[i + 3])); + __m128 o5 = rs.process(_mm_load1_ps(&inPtr[i + 4])); + __m128 o6 = rs.process(_mm_load1_ps(&inPtr[i + 5])); + __m128 o7 = rs.process(_mm_load1_ps(&inPtr[i + 6])); + __m128 o8 = rs.process(_mm_load1_ps(&inPtr[i + 7])); + __m128 output4 = outputs4[i]; + o1 = _mm_add_ps(o1, o2); + o2 = _mm_add_ps(o3, o4); + o3 = _mm_add_ps(o5, o6); + o4 = _mm_add_ps(o7, o8); + o1 = _mm_add_ps(o1, o2); + o2 = _mm_add_ps(o3, o4); + outputs4[i] = _mm_add_ps(o1, _mm_add_ps(output4, o2)); + } + } + for (; p < numStringPacks; ++p) { ResonantStringSSE& rs = stringPacks[p]; for (unsigned i = 0; i < numFrames; ++i) outputs4[i] = _mm_add_ps( diff --git a/src/sfizz/effects/impl/ResonantString.cpp b/src/sfizz/effects/impl/ResonantString.cpp index 7e7aaf402..48c17aa54 100644 --- a/src/sfizz/effects/impl/ResonantString.cpp +++ b/src/sfizz/effects/impl/ResonantString.cpp @@ -85,19 +85,5 @@ void ResonantString::setResonanceFrequency(float frequency, float bandwidth) fControl[17] = (fControl[10] + (4.0f - fControl[11])); } -float ResonantString::process(float input) -{ - fRec0[0] = (fControl[1] * ((fControl[4] * fRec1[1]) + (fControl[5] * fRec0[1]))); - float fTemp0 = input; - fRec2[0] = (fTemp0 - (fControl[15] * ((fControl[16] * fRec2[1]) + (fControl[17] * fRec2[2])))); - fRec1[0] = (((fControl[14] * fRec2[2]) + ((fControl[5] * fRec1[1]) + (fControl[13] * fRec2[0]))) - (fControl[4] * fRec0[1])); - float output = float((fControl[0] * fRec0[0])); - fRec0[1] = fRec0[0]; - fRec2[2] = fRec2[1]; - fRec2[1] = fRec2[0]; - fRec1[1] = fRec1[0]; - return output; -} - } // namespace sfz } // namespace fx diff --git a/src/sfizz/effects/impl/ResonantStringAVX.cpp b/src/sfizz/effects/impl/ResonantStringAVX.cpp index e3f0275ca..1098a2b92 100644 --- a/src/sfizz/effects/impl/ResonantStringAVX.cpp +++ b/src/sfizz/effects/impl/ResonantStringAVX.cpp @@ -110,20 +110,6 @@ void ResonantStringAVX::setResonanceFrequency(__m256 frequency, __m256 bandwidth fControl[17] = _mm256_add_ps(fControl[10], _mm256_sub_ps(_mm256_set1_ps(4.0f), fControl[11])); } -__m256 ResonantStringAVX::process(__m256 input) -{ - fRec0[0] = _mm256_mul_ps(fControl[1], _mm256_add_ps(_mm256_mul_ps(fControl[4], fRec1[1]), _mm256_mul_ps(fControl[5], fRec0[1]))); - __m256 fTemp0 = input; - fRec2[0] = _mm256_sub_ps(fTemp0, _mm256_mul_ps(fControl[15], _mm256_add_ps(_mm256_mul_ps(fControl[16], fRec2[1]), _mm256_mul_ps(fControl[17], fRec2[2])))); - fRec1[0] = _mm256_sub_ps(_mm256_add_ps(_mm256_mul_ps(fControl[14], fRec2[2]), _mm256_add_ps(_mm256_mul_ps(fControl[5], fRec1[1]), _mm256_mul_ps(fControl[13], fRec2[0]))),_mm256_mul_ps(fControl[4], fRec0[1])); - __m256 output = _mm256_mul_ps(fControl[0], fRec0[0]); - fRec0[1] = fRec0[0]; - fRec2[2] = fRec2[1]; - fRec2[1] = fRec2[0]; - fRec1[1] = fRec1[0]; - return output; -} - } // namespace sfz } // namespace fx #endif diff --git a/src/sfizz/effects/impl/ResonantStringAVXInline.h b/src/sfizz/effects/impl/ResonantStringAVXInline.h new file mode 100644 index 000000000..8942a1bd4 --- /dev/null +++ b/src/sfizz/effects/impl/ResonantStringAVXInline.h @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: BSD-2-Clause + +// This code is part of the sfizz library and is licensed under a BSD 2-clause +// license. You should have receive a LICENSE.md file along with the code. +// If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz + +#pragma once +#include "ResonantStringAVX.h" + +namespace sfz { +namespace fx { + +inline __m256 ResonantStringAVX::process(__m256 input) +{ + fRec0[0] = _mm256_mul_ps(fControl[1], _mm256_add_ps(_mm256_mul_ps(fControl[4], fRec1[1]), _mm256_mul_ps(fControl[5], fRec0[1]))); + __m256 fTemp0 = input; + fRec2[0] = _mm256_sub_ps(fTemp0, _mm256_mul_ps(fControl[15], _mm256_add_ps(_mm256_mul_ps(fControl[16], fRec2[1]), _mm256_mul_ps(fControl[17], fRec2[2])))); + fRec1[0] = _mm256_sub_ps(_mm256_add_ps(_mm256_mul_ps(fControl[14], fRec2[2]), _mm256_add_ps(_mm256_mul_ps(fControl[5], fRec1[1]), _mm256_mul_ps(fControl[13], fRec2[0]))),_mm256_mul_ps(fControl[4], fRec0[1])); + __m256 output = _mm256_mul_ps(fControl[0], fRec0[0]); + fRec0[1] = fRec0[0]; + fRec2[2] = fRec2[1]; + fRec2[1] = fRec2[0]; + fRec1[1] = fRec1[0]; + return output; +} + +} // namespace sfz +} // namespace fx diff --git a/src/sfizz/effects/impl/ResonantStringInline.h b/src/sfizz/effects/impl/ResonantStringInline.h new file mode 100644 index 000000000..80780261a --- /dev/null +++ b/src/sfizz/effects/impl/ResonantStringInline.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: BSD-2-Clause + +// This code is part of the sfizz library and is licensed under a BSD 2-clause +// license. You should have receive a LICENSE.md file along with the code. +// If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz + +#include "ResonantString.h" + +namespace sfz { +namespace fx { + +inline float ResonantString::process(float input) +{ + fRec0[0] = (fControl[1] * ((fControl[4] * fRec1[1]) + (fControl[5] * fRec0[1]))); + float fTemp0 = input; + fRec2[0] = (fTemp0 - (fControl[15] * ((fControl[16] * fRec2[1]) + (fControl[17] * fRec2[2])))); + fRec1[0] = (((fControl[14] * fRec2[2]) + ((fControl[5] * fRec1[1]) + (fControl[13] * fRec2[0]))) - (fControl[4] * fRec0[1])); + float output = float((fControl[0] * fRec0[0])); + fRec0[1] = fRec0[0]; + fRec2[2] = fRec2[1]; + fRec2[1] = fRec2[0]; + fRec1[1] = fRec1[0]; + return output; +} + +} // namespace sfz +} // namespace fx diff --git a/src/sfizz/effects/impl/ResonantStringSSE.cpp b/src/sfizz/effects/impl/ResonantStringSSE.cpp index 6e02d7303..1ea8b71c6 100644 --- a/src/sfizz/effects/impl/ResonantStringSSE.cpp +++ b/src/sfizz/effects/impl/ResonantStringSSE.cpp @@ -110,20 +110,6 @@ void ResonantStringSSE::setResonanceFrequency(__m128 frequency, __m128 bandwidth fControl[17] = _mm_add_ps(fControl[10], _mm_sub_ps(_mm_set1_ps(4.0f), fControl[11])); } -__m128 ResonantStringSSE::process(__m128 input) -{ - fRec0[0] = _mm_mul_ps(fControl[1], _mm_add_ps(_mm_mul_ps(fControl[4], fRec1[1]), _mm_mul_ps(fControl[5], fRec0[1]))); - __m128 fTemp0 = input; - fRec2[0] = _mm_sub_ps(fTemp0, _mm_mul_ps(fControl[15], _mm_add_ps(_mm_mul_ps(fControl[16], fRec2[1]), _mm_mul_ps(fControl[17], fRec2[2])))); - fRec1[0] = _mm_sub_ps(_mm_add_ps(_mm_mul_ps(fControl[14], fRec2[2]), _mm_add_ps(_mm_mul_ps(fControl[5], fRec1[1]), _mm_mul_ps(fControl[13], fRec2[0]))),_mm_mul_ps(fControl[4], fRec0[1])); - __m128 output = _mm_mul_ps(fControl[0], fRec0[0]); - fRec0[1] = fRec0[0]; - fRec2[2] = fRec2[1]; - fRec2[1] = fRec2[0]; - fRec1[1] = fRec1[0]; - return output; -} - } // namespace sfz } // namespace fx #endif diff --git a/src/sfizz/effects/impl/ResonantStringSSEInline.h b/src/sfizz/effects/impl/ResonantStringSSEInline.h new file mode 100644 index 000000000..9b69e7469 --- /dev/null +++ b/src/sfizz/effects/impl/ResonantStringSSEInline.h @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: BSD-2-Clause + +// This code is part of the sfizz library and is licensed under a BSD 2-clause +// license. You should have receive a LICENSE.md file along with the code. +// If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz + +#pragma once +#include "ResonantStringSSE.h" + +namespace sfz { +namespace fx { + +inline __m128 ResonantStringSSE::process(__m128 input) +{ + fRec0[0] = _mm_mul_ps(fControl[1], _mm_add_ps(_mm_mul_ps(fControl[4], fRec1[1]), _mm_mul_ps(fControl[5], fRec0[1]))); + __m128 fTemp0 = input; + fRec2[0] = _mm_sub_ps(fTemp0, _mm_mul_ps(fControl[15], _mm_add_ps(_mm_mul_ps(fControl[16], fRec2[1]), _mm_mul_ps(fControl[17], fRec2[2])))); + fRec1[0] = _mm_sub_ps(_mm_add_ps(_mm_mul_ps(fControl[14], fRec2[2]), _mm_add_ps(_mm_mul_ps(fControl[5], fRec1[1]), _mm_mul_ps(fControl[13], fRec2[0]))),_mm_mul_ps(fControl[4], fRec0[1])); + __m128 output = _mm_mul_ps(fControl[0], fRec0[0]); + fRec0[1] = fRec0[0]; + fRec2[2] = fRec2[1]; + fRec2[1] = fRec2[0]; + fRec1[1] = fRec1[0]; + return output; +} + +} // namespace sfz +} // namespace fx