From a156c5ece7133383468d4cba33f067595d9da391 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin <jmvalin@jmvalin.ca> Date: Mon, 26 Aug 2013 18:54:39 -0400 Subject: [PATCH] Makes the SILK pitch search use celt_pitch_xcorr() Should gives us ARM/SSE optimizations for free. --- celt/pitch.c | 2 +- celt/pitch.h | 2 +- silk/fixed/pitch_analysis_core_FIX.c | 20 +++++++++++++------- silk/float/pitch_analysis_core_FLP.c | 13 ++++++++++--- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/celt/pitch.c b/celt/pitch.c index 1b12b8398..0352b3025 100644 --- a/celt/pitch.c +++ b/celt/pitch.c @@ -250,7 +250,7 @@ opus_val32 #else void #endif -celt_pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) +celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) { int i,j; #ifdef FIXED_POINT diff --git a/celt/pitch.h b/celt/pitch.h index 086a46ba1..caffd24bc 100644 --- a/celt/pitch.h +++ b/celt/pitch.h @@ -140,6 +140,6 @@ opus_val32 #else void #endif -celt_pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch); +celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch); #endif diff --git a/silk/fixed/pitch_analysis_core_FIX.c b/silk/fixed/pitch_analysis_core_FIX.c index 12f2ce444..b6bc0bbfa 100644 --- a/silk/fixed/pitch_analysis_core_FIX.c +++ b/silk/fixed/pitch_analysis_core_FIX.c @@ -36,6 +36,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "pitch_est_defines.h" #include "stack_alloc.h" #include "debug.h" +#include "pitch.h" #define SCRATCH_SIZE 22 #define SF_LENGTH_4KHZ ( PE_SUBFR_LENGTH_MS * 4 ) @@ -96,6 +97,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 const opus_int16 *input_frame_ptr; opus_int i, k, d, j; VARDECL( opus_int16, C ); + VARDECL( opus_int32, xcorr32 ); const opus_int16 *target_ptr, *basis_ptr; opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target; opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp; @@ -173,6 +175,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 * FIRST STAGE, operating in 4 khz ******************************************************************************/ ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 ); + ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 ); silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) ); target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ]; for( k = 0; k < nb_subfr >> 1; k++ ) { @@ -186,8 +189,10 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 silk_assert( basis_ptr >= frame_4kHz ); silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); + celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1 ); + /* Calculate first vector products before loop */ - cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ ); + cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ]; normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ); normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ ) ); normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) ); @@ -203,7 +208,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 silk_assert( basis_ptr >= frame_4kHz ); silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); - cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ ); + cross_corr = xcorr32[ MAX_LAG_4KHZ - d ]; /* Add contribution of new sample and remove contribution from oldest sample */ normalizer = silk_ADD32( normalizer, @@ -595,11 +600,11 @@ static void silk_P_Ana_calc_corr_st3( opus_int complexity /* I Complexity setting */ ) { - const opus_int16 *target_ptr, *basis_ptr; - opus_int32 cross_corr; + const opus_int16 *target_ptr; opus_int i, j, k, lag_counter, lag_low, lag_high; opus_int nb_cbk_search, delta, idx, cbk_size; VARDECL( opus_int32, scratch_mem ); + VARDECL( opus_int32, xcorr32 ); const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; SAVE_STACK; @@ -619,6 +624,7 @@ static void silk_P_Ana_calc_corr_st3( cbk_size = PE_NB_CBKS_STAGE3_10MS; } ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 ); + ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 ); target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */ for( k = 0; k < nb_subfr; k++ ) { @@ -627,11 +633,11 @@ static void silk_P_Ana_calc_corr_st3( /* Calculate the correlations for each subframe */ lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); + silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); + celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1 ); for( j = lag_low; j <= lag_high; j++ ) { - basis_ptr = target_ptr - ( start_lag + j ); - cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length ); silk_assert( lag_counter < SCRATCH_SIZE ); - scratch_mem[ lag_counter ] = cross_corr; + scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ]; lag_counter++; } diff --git a/silk/float/pitch_analysis_core_FLP.c b/silk/float/pitch_analysis_core_FLP.c index 75d329cf4..ee69a36fb 100644 --- a/silk/float/pitch_analysis_core_FLP.c +++ b/silk/float/pitch_analysis_core_FLP.c @@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "SigProc_FLP.h" #include "SigProc_FIX.h" #include "pitch_est_defines.h" +#include "pitch.h" #define SCRATCH_SIZE 22 @@ -84,6 +85,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, opus_int32 filt_state[ 6 ]; silk_float threshold, contour_bias; silk_float C[ PE_MAX_NB_SUBFR][ (PE_MAX_LAG >> 1) + 5 ]; + opus_val32 xcorr[ PE_MAX_LAG_MS * 4 - PE_MIN_LAG_MS * 4 + 1 ]; silk_float CC[ PE_NB_CBKS_STAGE2_EXT ]; const silk_float *target_ptr, *basis_ptr; double cross_corr, normalizer, energy, energy_tmp; @@ -174,8 +176,10 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, silk_assert( basis_ptr >= frame_4kHz ); silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); + celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1 ); + /* Calculate first vector products before loop */ - cross_corr = silk_inner_product_FLP( target_ptr, basis_ptr, sf_length_8kHz ); + cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ]; normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + silk_energy_FLP( basis_ptr, sf_length_8kHz ) + sf_length_8kHz * 4000.0f; @@ -190,7 +194,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, silk_assert( basis_ptr >= frame_4kHz ); silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); - cross_corr = silk_inner_product_FLP(target_ptr, basis_ptr, sf_length_8kHz); + cross_corr = xcorr[ max_lag_4kHz - d ]; /* Add contribution of new sample and remove contribution from oldest sample */ normalizer += @@ -496,6 +500,7 @@ static void silk_P_Ana_calc_corr_st3( opus_int i, j, k, lag_counter, lag_low, lag_high; opus_int nb_cbk_search, delta, idx, cbk_size; silk_float scratch_mem[ SCRATCH_SIZE ]; + opus_val32 xcorr[ SCRATCH_SIZE ]; const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); @@ -521,10 +526,12 @@ static void silk_P_Ana_calc_corr_st3( /* Calculate the correlations for each subframe */ lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); + silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); + celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1 ); for( j = lag_low; j <= lag_high; j++ ) { basis_ptr = target_ptr - ( start_lag + j ); silk_assert( lag_counter < SCRATCH_SIZE ); - scratch_mem[ lag_counter ] = (silk_float)silk_inner_product_FLP( target_ptr, basis_ptr, sf_length ); + scratch_mem[ lag_counter ] = xcorr[ lag_high - j ]; lag_counter++; } -- GitLab