Skip to content

Commit

Permalink
revert: "Optimize 32 and wider avx2 block subtract (Kyle Siefring) - …
Browse files Browse the repository at this point in the history
…Tests can't be added"

This reverts commit f14607b.
  • Loading branch information
gianni-rosato committed Oct 13, 2024
1 parent 954f726 commit bb886b2
Showing 1 changed file with 10 additions and 11 deletions.
21 changes: 10 additions & 11 deletions Source/Lib/ASM_AVX2/convolve_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -1499,17 +1499,16 @@ uint64_t svt_av1_wedge_sse_from_residuals_avx2(const int16_t *r1, const int16_t
}

static INLINE void subtract32_avx2(int16_t *diff_ptr, const uint8_t *src_ptr, const uint8_t *pred_ptr) {
__m256i s = _mm256_lddqu_si256((__m256i *)(src_ptr));
__m256i p = _mm256_lddqu_si256((__m256i *)(pred_ptr));
__m256i set_one_minusone = _mm256_set1_epi32((int)0xff01ff01);
__m256i diff0 = _mm256_unpacklo_epi8(s, p);
__m256i diff1 = _mm256_unpackhi_epi8(s, p);
diff0 = _mm256_maddubs_epi16(diff0, set_one_minusone);
diff1 = _mm256_maddubs_epi16(diff1, set_one_minusone);
_mm256_store_si256((__m256i *)(diff_ptr),
_mm256_permute2x128_si256(diff0, diff1, 0x20));
_mm256_store_si256((__m256i *)(diff_ptr + 16),
_mm256_permute2x128_si256(diff0, diff1, 0x31));
__m256i s = _mm256_lddqu_si256((__m256i *)(src_ptr));
__m256i p = _mm256_lddqu_si256((__m256i *)(pred_ptr));
__m256i s_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(s));
__m256i s_1 = _mm256_cvtepu8_epi16(_mm256_extracti128_si256(s, 1));
__m256i p_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(p));
__m256i p_1 = _mm256_cvtepu8_epi16(_mm256_extracti128_si256(p, 1));
const __m256i d_0 = _mm256_sub_epi16(s_0, p_0);
const __m256i d_1 = _mm256_sub_epi16(s_1, p_1);
_mm256_storeu_si256((__m256i *)(diff_ptr), d_0);
_mm256_storeu_si256((__m256i *)(diff_ptr + 16), d_1);
}

static INLINE void aom_subtract_block_16xn_avx2(int rows, int16_t *diff_ptr, ptrdiff_t diff_stride,
Expand Down

0 comments on commit bb886b2

Please sign in to comment.