From de6eb96c71873fb23f0847424dca724b933ee399 Mon Sep 17 00:00:00 2001 From: "Matthias P. Braendli" Date: Fri, 10 Oct 2014 18:17:18 +0200 Subject: Correct non-SSE VAR GainMode --- src/GainControl.cpp | 112 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 96 insertions(+), 16 deletions(-) (limited to 'src/GainControl.cpp') diff --git a/src/GainControl.cpp b/src/GainControl.cpp index 6304de3..a34dcf2 100644 --- a/src/GainControl.cpp +++ b/src/GainControl.cpp @@ -224,13 +224,26 @@ __m128 GainControl::computeGainVar(const __m128* in, size_t sizeIn) __m128 i128 = _mm_set1_ps(sample + 1); __m128 q128 = _mm_div_ps(delta128, i128); mean128.m = _mm_add_ps(mean128.m, q128); + + /* + tmp128.m = in[sample]; + printf("S %zu, %.2f+%.2fj\t", + sample, + tmp128.f[0], tmp128.f[1]); + printf(": %.2f+%.2fj\n", mean128.f[0], mean128.f[1]); + + printf("S %zu, %.2f+%.2fj\t", + sample, + tmp128.f[2], tmp128.f[3]); + printf(": %.2f+%.2fj\n", mean128.f[2], mean128.f[3]); + */ } // Merging average tmp128.m = _mm_shuffle_ps(mean128.m, mean128.m, _MM_SHUFFLE(1, 0, 3, 2)); mean128.m = _mm_add_ps(mean128.m, tmp128.m); mean128.m = _mm_mul_ps(mean128.m, _mm_set1_ps(0.5f)); - PDEBUG("********** Mean: %10f, %10f, %10f, %10f **********\n", + PDEBUG("********** Mean: %10f + %10fj %10f + %10fj **********\n", mean128.f[0], mean128.f[1], mean128.f[2], mean128.f[3]); //////////////////////////////////////////////////////////////////////// @@ -243,16 +256,32 @@ __m128 GainControl::computeGainVar(const __m128* in, size_t sizeIn) __m128 i128 = _mm_set1_ps(sample + 1); __m128 q128 = _mm_div_ps(delta128, i128); var128.m = _mm_add_ps(var128.m, q128); + + /* + __u128 udiff128; udiff128.m = diff128; + __u128 udelta128; udelta128.m = delta128; + for (int off=0; off<4; off+=2) { + printf("S %zu, %.2f+%.2fj\t", + sample, + udiff128.f[off], udiff128.f[1+off]); + printf(": %.2f+%.2fj\t", udelta128.f[off], udelta128.f[1+off]); + printf(": %.2f+%.2fj\n", var128.f[off], var128.f[1+off]); + } + */ + } - // Merging average + PDEBUG("********** Vars: %10f + %10fj, %10f + %10fj **********\n", + var128.f[0], var128.f[1], var128.f[2], var128.f[3]); + + // Merging standard deviations tmp128.m = _mm_shuffle_ps(var128.m, var128.m, _MM_SHUFFLE(1, 0, 3, 2)); var128.m = _mm_add_ps(var128.m, tmp128.m); var128.m = _mm_mul_ps(var128.m, _mm_set1_ps(0.5f)); var128.m = _mm_sqrt_ps(var128.m); - PDEBUG("********** Var: %10f, %10f, %10f, %10f **********\n", + PDEBUG("********** Var: %10f + %10fj, %10f + %10fj **********\n", var128.f[0], var128.f[1], var128.f[2], var128.f[3]); var128.m = _mm_mul_ps(var128.m, _mm_set1_ps(4.0f)); - PDEBUG("********** 4*Var: %10f, %10f, %10f, %10f **********\n", + PDEBUG("********** 4*Var: %10f + %10fj, %10f + %10fj **********\n", var128.f[0], var128.f[1], var128.f[2], var128.f[3]); //////////////////////////////////////////////////////////////////////////// @@ -333,7 +362,17 @@ float GainControl::computeGainVar(const complexf* in, size_t sizeIn) { float gain; complexf mean; - complexf var; + + /* The variance calculation is a bit strange, because we + * emulate the exact same functionality as the SSE code, + * which is the most used one. + * + * TODO: verify that this actually corresponds to the + * gain mode suggested in EN 300 798 Clause 5.3 Numerical Range. + */ + complexf var1; + complexf var2; + static const float factor = 0x7fff; mean = complexf(0.0f, 0.0f); @@ -343,25 +382,66 @@ float GainControl::computeGainVar(const complexf* in, size_t sizeIn) float i = sample + 1; complexf q = delta / i; mean = mean + q; + + /* + printf("F %zu, %.2f+%.2fj\t", + sample, + in[sample].real(), in[sample].imag()); + + printf(": %.2f+%.2fj\n", mean.real(), mean.imag()); + */ } - PDEBUG("********** Mean: %10f, %10f **********\n", mean.real(), mean.imag()); + PDEBUG("********** Mean: %10f + %10fj **********\n", mean.real(), mean.imag()); //////////////////////////////////////////////////////////////////////// // Computing standard deviation //////////////////////////////////////////////////////////////////////// - var = complexf(0.0f, 0.0f); + var1 = complexf(0.0f, 0.0f); + var2 = complexf(0.0f, 0.0f); for (size_t sample = 0; sample < sizeIn; ++sample) { complexf diff = in[sample] - mean; - complexf delta = complexf(diff.real() * diff.real(), - diff.imag() * diff.imag()) - var; - float i = sample + 1; - complexf q = delta / i; - var = var + q; + complexf delta; + complexf q; + + float i = (sample/2) + 1; + if (sample % 2 == 0) { + delta = complexf(diff.real() * diff.real(), + diff.imag() * diff.imag()) - var1; + q = delta / i; + + var1 += q; + } + else { + delta = complexf(diff.real() * diff.real(), + diff.imag() * diff.imag()) - var2; + q = delta / i; + + var2 += q; + } + + /* + printf("F %zu, %.2f+%.2fj\t", + sample, + diff.real(), diff.imag()); + + printf(": %.2f+%.2fj\t", delta.real(), delta.imag()); + printf(": %.2f+%.2fj\t", var1.real(), var1.imag()); + printf(": %.2f+%.2fj\n", var2.real(), var2.imag()); + */ } - PDEBUG("********** Var: %10f, %10f **********\n", var.real(), var.imag()); + + PDEBUG("********** Vars: %10f + %10fj, %10f + %10fj **********\n", + var1.real(), var1.imag(), + var2.real(), var2.imag()); + + // Merge standard deviations in the same way the SSE version does it + complexf tmpvar = (var1 + var2) * 0.5f; + complexf var(sqrt(tmpvar.real()), sqrt(tmpvar.imag())); + PDEBUG("********** Var: %10f + %10fj **********\n", var.real(), var.imag()); + var = var * 4.0f; - PDEBUG("********** 4*Var: %10f, %10f **********\n", var.real(), var.imag()); + PDEBUG("********** 4*Var: %10f + %10fj **********\n", var.real(), var.imag()); //////////////////////////////////////////////////////////////////////////// // Computing gain @@ -372,10 +452,10 @@ float GainControl::computeGainVar(const complexf* in, size_t sizeIn) } // Detect NULL if ((int)var.real() == 0) { - gain = factor / var.real(); + gain = 1.0f; } else { - gain = 1.0f; + gain = factor / var.real(); } return gain; -- cgit v1.2.3 From dd429ea2a7d98ba3a6b5806734db347a1bed1924 Mon Sep 17 00:00:00 2001 From: "Matthias P. Braendli" Date: Fri, 24 Oct 2014 17:10:52 +0200 Subject: Make KISS entirely optional The Resampler block can also use fftw, and the Makefile only does the kiss fft compilation when --enable-fftw is not given. --- src/GainControl.cpp | 10 ++++- src/Makefile.am | 9 ++++- src/OfdmGenerator.cpp | 4 +- src/Resampler.cpp | 102 +++++++++++++++++++++++++++++++++----------------- src/Resampler.h | 22 ++++++++--- src/kiss_fftsimd.h | 8 ---- 6 files changed, 103 insertions(+), 52 deletions(-) (limited to 'src/GainControl.cpp') diff --git a/src/GainControl.cpp b/src/GainControl.cpp index a34dcf2..03d8aa6 100644 --- a/src/GainControl.cpp +++ b/src/GainControl.cpp @@ -27,12 +27,20 @@ #include "GainControl.h" #include "PcDebug.h" -#include "kiss_fftsimd.h" #include #include #include +#ifdef __SSE__ +# include +union __u128 { + __m128 m; + float f[4]; +}; +#endif + + using namespace std; diff --git a/src/Makefile.am b/src/Makefile.am index b7095c8..3d1eefc 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -42,6 +42,7 @@ endif bin_PROGRAMS = odr-dabmod +if USE_KISS_FFT FFT_DIR=$(top_builddir)/lib/kiss_fft129 FFT_INC=-I$(FFT_DIR) -I$(FFT_DIR)/tools FFT_SRC=$(FFT_DIR)/kiss_fft.c $(FFT_DIR)/kiss_fft.h $(FFT_DIR)/tools/kiss_fftr.c $(FFT_DIR)/tools/kiss_fftr.h kiss_fftsimd.c kiss_fftsimd.h @@ -53,10 +54,13 @@ DabModulator.cpp: $(FFT_DIR) BUILT_SOURCES: $(FFT_DIR) -if USE_KISS_FFT FFT_LDADD= else FFT_LDADD=-lfftw3f -lm +FFT_DIR= +FFT_INC= +FFT_SRC= +FFT_FLG= endif $(FFT_DIR): @@ -116,8 +120,11 @@ nodist_odr_dabmod_SOURCES =$(FFT_SRC) dist_bin_SCRIPTS =crc-dwap.py +if USE_KISS_FFT EXTRA_DIST =kiss_fftsimd.c kiss_fftsimd.h clean-local: rm -rf $(FFT_DIR) +endif + diff --git a/src/OfdmGenerator.cpp b/src/OfdmGenerator.cpp index 90527d4..7044249 100644 --- a/src/OfdmGenerator.cpp +++ b/src/OfdmGenerator.cpp @@ -102,8 +102,8 @@ OfdmGenerator::OfdmGenerator(size_t nbSymbols, FFTW_BACKWARD, FFTW_MEASURE); if (sizeof(complexf) != sizeof(FFT_TYPE)) { - printf("sizeof(complexf) %d\n", sizeof(complexf)); - printf("sizeof(FFT_TYPE) %d\n", sizeof(FFT_TYPE)); + printf("sizeof(complexf) %zu\n", sizeof(complexf)); + printf("sizeof(FFT_TYPE) %zu\n", sizeof(FFT_TYPE)); throw std::runtime_error( "OfdmGenerator::process complexf size is not FFT_TYPE size!"); } diff --git a/src/Resampler.cpp b/src/Resampler.cpp index 334be99..cda4ff4 100644 --- a/src/Resampler.cpp +++ b/src/Resampler.cpp @@ -1,6 +1,11 @@ /* Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty the Queen in Right of Canada (Communications Research Center Canada) + + Copyright (C) 2014 + Matthias P. Braendli, matthias.braendli@mpb.li + + http://opendigitalradio.org */ /* This file is part of ODR-DabMod. @@ -22,17 +27,16 @@ #include "Resampler.h" #include "PcDebug.h" - -#if HAVE_DECL__MM_MALLOC -# include -#else -# define memalign(a, b) malloc(b) -#endif +#include #include #include #include #include +#if USE_FFTW +# define FFT_REAL(x) x[0] +# define FFT_IMAG(x) x[1] +#endif unsigned gcd(unsigned a, unsigned b) { @@ -59,6 +63,9 @@ Resampler::Resampler(size_t inputRate, size_t outputRate, size_t resolution) : { PDEBUG("Resampler::Resampler(%zu, %zu) @ %p\n", inputRate, outputRate, this); +#if USE_FFTW + fprintf(stderr, "This software uses the FFTW library.\n\n"); +#else fprintf(stderr, "This software uses KISS FFT.\n\n"); fprintf(stderr, "Copyright (c) 2003-2004 Mark Borgerding\n" "\n" @@ -92,6 +99,7 @@ Resampler::Resampler(size_t inputRate, size_t outputRate, size_t resolution) : "OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY " "OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE " "POSSIBILITY OF SUCH DAMAGE.\n"); +#endif size_t divisor = gcd(inputRate, outputRate); L = outputRate / divisor; @@ -119,6 +127,22 @@ Resampler::Resampler(size_t inputRate, size_t outputRate, size_t resolution) : PDEBUG("Window[%zu] = %f\n", i, myWindow[i]); } +#if USE_FFTW + myFftIn = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * myFftSizeIn); + myFront = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * myFftSizeIn); + myFftPlan1 = fftwf_plan_dft_1d(myFftSizeIn, + myFftIn, myFront, + FFTW_FORWARD, FFTW_MEASURE); + + myBack = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * myFftSizeOut); + myFftOut = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * myFftSizeOut); + myFftPlan2 = fftwf_plan_dft_1d(myFftSizeOut, + myBack, myFftOut, + FFTW_BACKWARD, FFTW_MEASURE); + + myBufferIn = (complexf*)fftwf_malloc(sizeof(FFT_TYPE) * myFftSizeIn / 2); + myBufferOut = (complexf*)fftwf_malloc(sizeof(FFT_TYPE) * myFftSizeOut / 2); +#else myFftIn = (FFT_TYPE*)memalign(16, myFftSizeIn * sizeof(FFT_TYPE)); myFftOut = (FFT_TYPE*)memalign(16, myFftSizeOut * sizeof(FFT_TYPE)); myBufferIn = (complexf*)memalign(16, myFftSizeIn / 2 * sizeof(FFT_TYPE)); @@ -127,6 +151,7 @@ Resampler::Resampler(size_t inputRate, size_t outputRate, size_t resolution) : myBack = (FFT_TYPE*)memalign(16, myFftSizeOut * sizeof(FFT_TYPE)); myFftPlan1 = kiss_fft_alloc(myFftSizeIn, 0, NULL, NULL); myFftPlan2 = kiss_fft_alloc(myFftSizeOut, 1, NULL, NULL); +#endif memset(myBufferIn, 0, myFftSizeIn / 2 * sizeof(FFT_TYPE)); memset(myBufferOut, 0, myFftSizeOut / 2 * sizeof(FFT_TYPE)); @@ -137,34 +162,30 @@ Resampler::~Resampler() { PDEBUG("Resampler::~Resampler() @ %p\n", this); - if (myFftPlan1 != NULL) { - free(myFftPlan1); - } - if (myFftPlan2 != NULL) { - free(myFftPlan2); - } - if (myFftIn != NULL) { - free(myFftIn); - } - if (myFftOut != NULL) { - free(myFftOut); - } - if (myBufferIn != NULL) { - free(myBufferIn); - } - if (myBufferOut != NULL) { - free(myBufferOut); - } - if (myFront != NULL) { - free(myFront); - } - if (myBack != NULL) { - free(myBack); - } - if (myWindow != NULL) { - free(myWindow); - } +#if USE_FFTW + if (myFftPlan1 != NULL) { fftwf_free(myFftPlan1); } + if (myFftPlan2 != NULL) { fftwf_free(myFftPlan2); } + if (myFftIn != NULL) { fftwf_free(myFftIn); } + if (myFftOut != NULL) { fftwf_free(myFftOut); } + if (myBufferIn != NULL) { fftwf_free(myBufferIn); } + if (myBufferOut != NULL) { fftwf_free(myBufferOut); } + if (myFront != NULL) { fftwf_free(myFront); } + if (myBack != NULL) { fftwf_free(myBack); } + if (myWindow != NULL) { fftwf_free(myWindow); } + fftwf_destroy_plan(myFftPlan1); + fftwf_destroy_plan(myFftPlan2); +#else + if (myFftPlan1 != NULL) { free(myFftPlan1); } + if (myFftPlan2 != NULL) { free(myFftPlan2); } + if (myFftIn != NULL) { free(myFftIn); } + if (myFftOut != NULL) { free(myFftOut); } + if (myBufferIn != NULL) { free(myBufferIn); } + if (myBufferOut != NULL) { free(myBufferOut); } + if (myFront != NULL) { free(myFront); } + if (myBack != NULL) { free(myBack); } + if (myWindow != NULL) { free(myWindow); } kiss_fft_cleanup(); +#endif } @@ -179,7 +200,7 @@ int Resampler::process(Buffer* const dataIn, Buffer* dataOut) FFT_TYPE* out = reinterpret_cast(dataOut->getData()); size_t sizeIn = dataIn->getLength() / sizeof(complexf); -#ifdef USE_SIMD +#if defined(USE_SIMD) && !USE_FFTW size_t sizeOut = dataOut->getLength() / sizeof(complexf); typedef struct { @@ -263,7 +284,9 @@ int Resampler::process(Buffer* const dataIn, Buffer* dataOut) j += myFftSizeOut / 2; } } -#else +#endif + +#if USE_FFTW || (!defined(USE_SIMD)) for (size_t i = 0, j = 0; i < sizeIn; i += myFftSizeIn / 2, j += myFftSizeOut / 2) { memcpy(myFftIn, myBufferIn, myFftSizeIn / 2 * sizeof(FFT_TYPE)); memcpy(myFftIn + (myFftSizeIn / 2), in + i, myFftSizeIn / 2 * sizeof(FFT_TYPE)); @@ -273,7 +296,11 @@ int Resampler::process(Buffer* const dataIn, Buffer* dataOut) FFT_IMAG(myFftIn[k]) *= myWindow[k]; } +#if USE_FFTW + fftwf_execute(myFftPlan1); +#else kiss_fft(myFftPlan1, myFftIn, myFront); +#endif if (myFftSizeOut > myFftSizeIn) { memset(myBack, 0, myFftSizeOut * sizeof(FFT_TYPE)); @@ -304,7 +331,11 @@ int Resampler::process(Buffer* const dataIn, Buffer* dataOut) FFT_IMAG(myBack[k]) *= myFactor; } +#if USE_FFTW + fftwf_execute(myFftPlan2); +#else kiss_fft(myFftPlan2, myBack, myFftOut); +#endif for (size_t k = 0; k < myFftSizeOut / 2; ++k) { FFT_REAL(out[j + k]) = myBufferOut[k].real() + FFT_REAL(myFftOut[k]); @@ -316,3 +347,4 @@ int Resampler::process(Buffer* const dataIn, Buffer* dataOut) return 1; } + diff --git a/src/Resampler.h b/src/Resampler.h index a19b14e..392d0a6 100644 --- a/src/Resampler.h +++ b/src/Resampler.h @@ -1,6 +1,11 @@ /* Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty the Queen in Right of Canada (Communications Research Center Canada) + + Copyright (C) 2014 + Matthias P. Braendli, matthias.braendli@mpb.li + + http://opendigitalradio.org */ /* This file is part of ODR-DabMod. @@ -28,12 +33,19 @@ #include "porting.h" #include "ModCodec.h" -#include "kiss_fftsimd.h" +#if USE_FFTW +# include +# include +# define FFT_TYPE fftwf_complex +# define FFT_PLAN fftwf_plan -#include -#include -#include +#else +# include "kiss_fftsimd.h" +# include +# include +# include +#endif #include typedef std::complex complexf; @@ -68,5 +80,5 @@ protected: float myFactor; }; - #endif // RESAMPLER_H + diff --git a/src/kiss_fftsimd.h b/src/kiss_fftsimd.h index 6d38dc7..90ee435 100644 --- a/src/kiss_fftsimd.h +++ b/src/kiss_fftsimd.h @@ -36,14 +36,6 @@ #define FFT_IMAG(a) (a).i -#ifdef __SSE__ -#include -union __u128 { - __m128 m; - float f[4]; -}; -#endif - #ifdef USE_SIMD -- cgit v1.2.3