diff options
| -rw-r--r-- | host/lib/convert/CMakeLists.txt | 17 | ||||
| -rw-r--r-- | host/lib/convert/convert_pack_sc12.cpp | 116 | ||||
| -rw-r--r-- | host/lib/convert/convert_pack_sc12.hpp | 123 | ||||
| -rw-r--r-- | host/lib/convert/convert_unpack_sc12.cpp | 99 | ||||
| -rw-r--r-- | host/lib/convert/convert_unpack_sc12.hpp | 112 | ||||
| -rw-r--r-- | host/lib/convert/ssse3_pack_sc12.cpp | 244 | ||||
| -rw-r--r-- | host/lib/convert/ssse3_unpack_sc12.cpp | 219 | 
7 files changed, 719 insertions, 211 deletions
diff --git a/host/lib/convert/CMakeLists.txt b/host/lib/convert/CMakeLists.txt index 10376ba9c..cfd3c7f34 100644 --- a/host/lib/convert/CMakeLists.txt +++ b/host/lib/convert/CMakeLists.txt @@ -26,6 +26,7 @@ MESSAGE(STATUS "")  ########################################################################  IF(CMAKE_COMPILER_IS_GNUCXX)      SET(EMMINTRIN_FLAGS -msse2) +    SET(TMMINTRIN_FLAGS -mssse3)  ELSEIF(MSVC)      SET(EMMINTRIN_FLAGS /arch:SSE2)  ENDIF() @@ -34,6 +35,10 @@ SET(CMAKE_REQUIRED_FLAGS ${EMMINTRIN_FLAGS})  CHECK_INCLUDE_FILE_CXX(emmintrin.h HAVE_EMMINTRIN_H)  SET(CMAKE_REQUIRED_FLAGS) +SET(CMAKE_REQUIRED_FLAGS ${TMMINTRIN_FLAGS}) +CHECK_INCLUDE_FILE_CXX(tmmintrin.h HAVE_TMMINTRIN_H) +SET(CMAKE_REQUIRED_FLAGS) +  IF(HAVE_EMMINTRIN_H)      SET(convert_with_sse2_sources          ${CMAKE_CURRENT_SOURCE_DIR}/sse2_sc16_to_sc16.cpp @@ -53,6 +58,18 @@ IF(HAVE_EMMINTRIN_H)      LIBUHD_APPEND_SOURCES(${convert_with_sse2_sources})  ENDIF(HAVE_EMMINTRIN_H) +IF(HAVE_TMMINTRIN_H) +    SET(convert_with_ssse3_sources +        ${CMAKE_CURRENT_SOURCE_DIR}/ssse3_pack_sc12.cpp +        ${CMAKE_CURRENT_SOURCE_DIR}/ssse3_unpack_sc12.cpp +    ) +    SET_SOURCE_FILES_PROPERTIES( +        ${convert_with_ssse3_sources} +        PROPERTIES COMPILE_FLAGS "${TMMINTRIN_FLAGS}" +    ) +    LIBUHD_APPEND_SOURCES(${convert_with_ssse3_sources}) +ENDIF(HAVE_TMMINTRIN_H) +  ########################################################################  # Check for NEON SIMD headers  ######################################################################## diff --git a/host/lib/convert/convert_pack_sc12.cpp b/host/lib/convert/convert_pack_sc12.cpp index 2e45e19f5..85194dcdd 100644 --- a/host/lib/convert/convert_pack_sc12.cpp +++ b/host/lib/convert/convert_pack_sc12.cpp @@ -1,5 +1,5 @@  // -// Copyright 2013 Ettus Research LLC +// Copyright 2017 Ettus Research LLC  //  // This program is free software: you can redistribute it and/or modify  // it under the terms of the GNU General Public License as published by @@ -15,122 +15,10 @@  // along with this program.  If not, see <http://www.gnu.org/licenses/>.  // -#include "convert_common.hpp" -#include <uhd/utils/byteswap.hpp> -#include <uhd/utils/log.hpp> -#include <boost/math/special_functions/round.hpp> -#include <vector> -#include <type_traits> +#include "convert_pack_sc12.hpp"  using namespace uhd::convert; -typedef uint32_t (*towire32_type)(uint32_t); - -/* C language specification requires this to be packed - * (i.e., line0, line1, line2 will be in adjacent memory locations). - * If this was not true, we'd need compiler flags here to specify - * alignment/packing. - */ -struct item32_sc12_3x -{ -    item32_t line0; -    item32_t line1; -    item32_t line2; -}; - -enum item32_sc12_3x_enable { -    CONVERT12_LINE0 = 0x01, -    CONVERT12_LINE1 = 0x02, -    CONVERT12_LINE2 = 0x04, -    CONVERT12_LINE_ALL = 0x07, -}; - -/* - * Packed 12-bit converter with selective line enable - * - * The converter operates on 4 complex inputs and selectively writes to one to - * three 32-bit lines. Line selection allows for partial writes of less than - * 4 complex samples, or a full 3 x 32-bit struct. Writes are always full 32-bit - * lines, so in the case of partial writes, the number of bytes written will - * exceed the the number of bytes filled by actual samples. - * - *  _ _ _ _ _ _ _ _ - * |_ _ _1_ _ _|_ _| 0 - * |_2_ _ _|_ _ _3_| - * |_ _|_ _ _4_ _ _| 2 - * 31              0 - */ -template <towire32_type towire> -inline void pack(item32_sc12_3x &output, int enable, const int32_t i[4], const int32_t q[4]) -{ -    if (enable & CONVERT12_LINE0) -        output.line0 = towire(i[0] << 20 | q[0] <<  8 | i[1] >> 4); -    if (enable & CONVERT12_LINE1) -        output.line1 = towire(i[1] << 28 | q[1] << 16 | i[2] << 4 | q[2] >> 8); -    if (enable & CONVERT12_LINE2) -        output.line2 = towire(q[2] << 24 | i[3] << 12 | q[3]); -} - -template <typename type, towire32_type towire> -void convert_star_4_to_sc12_item32_3 -( -    const std::complex<type> &in0, -    const std::complex<type> &in1, -    const std::complex<type> &in2, -    const std::complex<type> &in3, -    const int enable, -    item32_sc12_3x &output, -    const double scalar, -    typename std::enable_if<std::is_floating_point<type>::value>::type* = NULL -) -{ -    int32_t i[4] { -        int32_t(in0.real()*scalar) & 0xfff, -        int32_t(in1.real()*scalar) & 0xfff, -        int32_t(in2.real()*scalar) & 0xfff, -        int32_t(in3.real()*scalar) & 0xfff, -    }; - -    int32_t q[4] { -        int32_t(in0.imag()*scalar) & 0xfff, -        int32_t(in1.imag()*scalar) & 0xfff, -        int32_t(in2.imag()*scalar) & 0xfff, -        int32_t(in3.imag()*scalar) & 0xfff, -    }; - -    pack<towire>(output, enable, i, q); -} - -template <typename type, towire32_type towire> -void convert_star_4_to_sc12_item32_3 -( -    const std::complex<type> &in0, -    const std::complex<type> &in1, -    const std::complex<type> &in2, -    const std::complex<type> &in3, -    const int enable, -    item32_sc12_3x &output, -    const double, -    typename std::enable_if<std::is_same<type, short>::value>::type* = NULL -) -{ -    int32_t i[4] { -        int32_t(in0.real() >> 4) & 0xfff, -        int32_t(in1.real() >> 4) & 0xfff, -        int32_t(in2.real() >> 4) & 0xfff, -        int32_t(in3.real() >> 4) & 0xfff, -    }; - -    int32_t q[4] { -        int32_t(in0.imag() >> 4) & 0xfff, -        int32_t(in1.imag() >> 4) & 0xfff, -        int32_t(in2.imag() >> 4) & 0xfff, -        int32_t(in3.imag() >> 4) & 0xfff, -    }; - -    pack<towire>(output, enable, i, q); -} -  template <typename type, towire32_type towire>  struct convert_star_1_to_sc12_item32_1 : public converter  { diff --git a/host/lib/convert/convert_pack_sc12.hpp b/host/lib/convert/convert_pack_sc12.hpp new file mode 100644 index 000000000..754c47cd2 --- /dev/null +++ b/host/lib/convert/convert_pack_sc12.hpp @@ -0,0 +1,123 @@ +// +// Copyright 2017 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program.  If not, see <http://www.gnu.org/licenses/>. +// + +#include <type_traits> +#include <uhd/utils/byteswap.hpp> +#include "convert_common.hpp" + +using namespace uhd::convert; + +typedef uint32_t (*towire32_type)(uint32_t); + +/* C language specification requires this to be packed + * (i.e., line0, line1, line2 will be in adjacent memory locations). + * If this was not true, we'd need compiler flags here to specify + * alignment/packing. + */ +struct item32_sc12_3x +{ +    item32_t line0; +    item32_t line1; +    item32_t line2; +}; + +enum item32_sc12_3x_enable { +    CONVERT12_LINE0 = 0x01, +    CONVERT12_LINE1 = 0x02, +    CONVERT12_LINE2 = 0x04, +    CONVERT12_LINE_ALL = 0x07, +}; + +/* + * Packed 12-bit converter with selective line enable + * + * The converter operates on 4 complex inputs and selectively writes to one to + * three 32-bit lines. Line selection allows for partial writes of less than + * 4 complex samples, or a full 3 x 32-bit struct. Writes are always full 32-bit + * lines, so in the case of partial writes, the number of bytes written will + * exceed the the number of bytes filled by actual samples. + * + *  _ _ _ _ _ _ _ _ + * |_ _ _1_ _ _|_ _| 0 + * |_2_ _ _|_ _ _3_| + * |_ _|_ _ _4_ _ _| 2 + * 31              0 + */ +template <towire32_type towire> +void pack(item32_sc12_3x &output, int enable, const int32_t iq[8]) +{ +    if (enable & CONVERT12_LINE0) +        output.line0 = towire(iq[0] << 20 | iq[1] <<  8 | iq[2] >> 4); +    if (enable & CONVERT12_LINE1) +        output.line1 = towire(iq[2] << 28 | iq[3] << 16 | iq[4] << 4 | iq[5] >> 8); +    if (enable & CONVERT12_LINE2) +        output.line2 = towire(iq[5] << 24 | iq[6] << 12 | iq[7] << 0); +} + +template <typename type, towire32_type towire> +void convert_star_4_to_sc12_item32_3 +( +    const std::complex<type> &in0, +    const std::complex<type> &in1, +    const std::complex<type> &in2, +    const std::complex<type> &in3, +    const int enable, +    item32_sc12_3x &output, +    const double scalar, +    typename std::enable_if<std::is_floating_point<type>::value>::type* = NULL +) +{ +    int32_t iq[8] { +        int32_t(in0.real()*scalar) & 0xfff, +        int32_t(in0.imag()*scalar) & 0xfff, +        int32_t(in1.real()*scalar) & 0xfff, +        int32_t(in1.imag()*scalar) & 0xfff, + +        int32_t(in2.real()*scalar) & 0xfff, +        int32_t(in2.imag()*scalar) & 0xfff, +        int32_t(in3.real()*scalar) & 0xfff, +        int32_t(in3.imag()*scalar) & 0xfff, +    }; +    pack<towire>(output, enable, iq); +} + +template <typename type, towire32_type towire> +void convert_star_4_to_sc12_item32_3 +( +    const std::complex<type> &in0, +    const std::complex<type> &in1, +    const std::complex<type> &in2, +    const std::complex<type> &in3, +    const int enable, +    item32_sc12_3x &output, +    const double, +    typename std::enable_if<std::is_same<type, short>::value>::type* = NULL +) +{ +    int32_t iq[8] { +        int32_t(in0.real() >> 4) & 0xfff, +        int32_t(in0.imag() >> 4) & 0xfff, +        int32_t(in1.real() >> 4) & 0xfff, +        int32_t(in1.imag() >> 4) & 0xfff, + +        int32_t(in2.real() >> 4) & 0xfff, +        int32_t(in2.imag() >> 4) & 0xfff, +        int32_t(in3.real() >> 4) & 0xfff, +        int32_t(in3.imag() >> 4) & 0xfff, +    }; +    pack<towire>(output, enable, iq); +} diff --git a/host/lib/convert/convert_unpack_sc12.cpp b/host/lib/convert/convert_unpack_sc12.cpp index 07f9cffa0..43c35ee3b 100644 --- a/host/lib/convert/convert_unpack_sc12.cpp +++ b/host/lib/convert/convert_unpack_sc12.cpp @@ -1,5 +1,5 @@  // -// Copyright 2013 Ettus Research LLC +// Copyright 2017 Ettus Research LLC  //  // This program is free software: you can redistribute it and/or modify  // it under the terms of the GNU General Public License as published by @@ -15,105 +15,10 @@  // along with this program.  If not, see <http://www.gnu.org/licenses/>.  // -#include "convert_common.hpp" -#include <uhd/utils/byteswap.hpp> -#include <uhd/utils/log.hpp> -#include <boost/math/special_functions/round.hpp> -#include <vector> -#include <type_traits> +#include "convert_unpack_sc12.hpp"  using namespace uhd::convert; -typedef uint32_t (*tohost32_type)(uint32_t); - -/* C language specification requires this to be packed - * (i.e., line0, line1, line2 will be in adjacent memory locations). - * If this was not true, we'd need compiler flags here to specify - * alignment/packing. - */ -struct item32_sc12_3x -{ -    item32_t line0; -    item32_t line1; -    item32_t line2; -}; - -/* - * convert_sc12_item32_3_to_star_4 takes in 3 lines with 32 bit each - * and converts them 4 samples of type 'std::complex<type>'. - * The structure of the 3 lines is as follows: - *  _ _ _ _ _ _ _ _ - * |_ _ _1_ _ _|_ _| - * |_2_ _ _|_ _ _3_| - * |_ _|_ _ _4_ _ _| - * - * The numbers mark the position of one complex sample. - */ -template <typename type, tohost32_type tohost> -void convert_sc12_item32_3_to_star_4 -( -    const item32_sc12_3x &input, -    std::complex<type> &out0, -    std::complex<type> &out1, -    std::complex<type> &out2, -    std::complex<type> &out3, -    const double scalar, -    typename std::enable_if<std::is_floating_point<type>::value>::type* = NULL -) -{ -    //step 0: extract the lines from the input buffer -    const item32_t line0 = tohost(input.line0); -    const item32_t line1 = tohost(input.line1); -    const item32_t line2 = tohost(input.line2); -    const uint64_t line01 = (uint64_t(line0) << 32) | line1; -    const uint64_t line12 = (uint64_t(line1) << 32) | line2; - -    //step 1: shift out and mask off the individual numbers -    const type i0 = type(int16_t((line0 >> 16) & 0xfff0)*scalar); -    const type q0 = type(int16_t((line0 >> 4) & 0xfff0)*scalar); - -    const type i1 = type(int16_t((line01 >> 24) & 0xfff0)*scalar); -    const type q1 = type(int16_t((line1 >> 12) & 0xfff0)*scalar); - -    const type i2 = type(int16_t((line1 >> 0) & 0xfff0)*scalar); -    const type q2 = type(int16_t((line12 >> 20) & 0xfff0)*scalar); - -    const type i3 = type(int16_t((line2 >> 8) & 0xfff0)*scalar); -    const type q3 = type(int16_t((line2 << 4) & 0xfff0)*scalar); - -    //step 2: load the outputs -    out0 = std::complex<type>(i0, q0); -    out1 = std::complex<type>(i1, q1); -    out2 = std::complex<type>(i2, q2); -    out3 = std::complex<type>(i3, q3); -} - -template <typename type, tohost32_type tohost> -void convert_sc12_item32_3_to_star_4 -( -    const item32_sc12_3x &input, -    std::complex<type> &out0, -    std::complex<type> &out1, -    std::complex<type> &out2, -    std::complex<type> &out3, -    const double, -    typename std::enable_if<std::is_integral<type>::value>::type* = NULL -) -{ -    //step 0: extract the lines from the input buffer -    const item32_t line0 = tohost(input.line0); -    const item32_t line1 = tohost(input.line1); -    const item32_t line2 = tohost(input.line2); -    const uint64_t line01 = (uint64_t(line0) << 32) | line1; -    const uint64_t line12 = (uint64_t(line1) << 32) | line2; - -    //step 1: extract and load the outputs -    out0 = std::complex<type>(line0  >> 16 & 0xfff0, line0  >>  4 & 0xfff0); -    out1 = std::complex<type>(line01 >> 24 & 0xfff0, line1  >> 12 & 0xfff0); -    out2 = std::complex<type>(line1  >>  0 & 0xfff0, line12 >> 20 & 0xfff0); -    out3 = std::complex<type>(line2  >>  8 & 0xfff0, line2  <<  4 & 0xfff0); -} -  template <typename type, tohost32_type tohost>  struct convert_sc12_item32_1_to_star_1 : public converter  { diff --git a/host/lib/convert/convert_unpack_sc12.hpp b/host/lib/convert/convert_unpack_sc12.hpp new file mode 100644 index 000000000..46e7d58fb --- /dev/null +++ b/host/lib/convert/convert_unpack_sc12.hpp @@ -0,0 +1,112 @@ +// +// Copyright 2017 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program.  If not, see <http://www.gnu.org/licenses/>. +// + +#include <type_traits> +#include <uhd/utils/byteswap.hpp> +#include "convert_common.hpp" + +using namespace uhd::convert; + +typedef uint32_t (*tohost32_type)(uint32_t); + +/* C language specification requires this to be packed + * (i.e., line0, line1, line2 will be in adjacent memory locations). + * If this was not true, we'd need compiler flags here to specify + * alignment/packing. + */ +struct item32_sc12_3x +{ +    item32_t line0; +    item32_t line1; +    item32_t line2; +}; + +/* + * convert_sc12_item32_3_to_star_4 takes in 3 lines with 32 bit each + * and converts them 4 samples of type 'std::complex<type>'. + * The structure of the 3 lines is as follows: + *  _ _ _ _ _ _ _ _ + * |_ _ _1_ _ _|_ _| + * |_2_ _ _|_ _ _3_| + * |_ _|_ _ _4_ _ _| + * + * The numbers mark the position of one complex sample. + */ +template <typename type, tohost32_type tohost> +void convert_sc12_item32_3_to_star_4 +( +    const item32_sc12_3x &input, +    std::complex<type> &out0, +    std::complex<type> &out1, +    std::complex<type> &out2, +    std::complex<type> &out3, +    const double scalar, +    typename std::enable_if<std::is_floating_point<type>::value>::type* = NULL +) +{ +    //step 0: extract the lines from the input buffer +    const item32_t line0 = tohost(input.line0); +    const item32_t line1 = tohost(input.line1); +    const item32_t line2 = tohost(input.line2); +    const uint64_t line01 = (uint64_t(line0) << 32) | line1; +    const uint64_t line12 = (uint64_t(line1) << 32) | line2; + +    //step 1: shift out and mask off the individual numbers +    const type i0 = type(int16_t((line0 >> 16) & 0xfff0)*scalar); +    const type q0 = type(int16_t((line0 >> 4) & 0xfff0)*scalar); + +    const type i1 = type(int16_t((line01 >> 24) & 0xfff0)*scalar); +    const type q1 = type(int16_t((line1 >> 12) & 0xfff0)*scalar); + +    const type i2 = type(int16_t((line1 >> 0) & 0xfff0)*scalar); +    const type q2 = type(int16_t((line12 >> 20) & 0xfff0)*scalar); + +    const type i3 = type(int16_t((line2 >> 8) & 0xfff0)*scalar); +    const type q3 = type(int16_t((line2 << 4) & 0xfff0)*scalar); + +    //step 2: load the outputs +    out0 = std::complex<type>(i0, q0); +    out1 = std::complex<type>(i1, q1); +    out2 = std::complex<type>(i2, q2); +    out3 = std::complex<type>(i3, q3); +} + +template <typename type, tohost32_type tohost> +void convert_sc12_item32_3_to_star_4 +( +    const item32_sc12_3x &input, +    std::complex<type> &out0, +    std::complex<type> &out1, +    std::complex<type> &out2, +    std::complex<type> &out3, +    const double, +    typename std::enable_if<std::is_integral<type>::value>::type* = NULL +) +{ +    //step 0: extract the lines from the input buffer +    const item32_t line0 = tohost(input.line0); +    const item32_t line1 = tohost(input.line1); +    const item32_t line2 = tohost(input.line2); +    const uint64_t line01 = (uint64_t(line0) << 32) | line1; +    const uint64_t line12 = (uint64_t(line1) << 32) | line2; + +    //step 1: extract and load the outputs +    out0 = std::complex<type>(line0  >> 16 & 0xfff0, line0  >>  4 & 0xfff0); +    out1 = std::complex<type>(line01 >> 24 & 0xfff0, line1  >> 12 & 0xfff0); +    out2 = std::complex<type>(line1  >>  0 & 0xfff0, line12 >> 20 & 0xfff0); +    out3 = std::complex<type>(line2  >>  8 & 0xfff0, line2  <<  4 & 0xfff0); +} diff --git a/host/lib/convert/ssse3_pack_sc12.cpp b/host/lib/convert/ssse3_pack_sc12.cpp new file mode 100644 index 000000000..42c429b67 --- /dev/null +++ b/host/lib/convert/ssse3_pack_sc12.cpp @@ -0,0 +1,244 @@ +// +// Copyright 2017 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program.  If not, see <http://www.gnu.org/licenses/>. +// + +#include <tmmintrin.h> +#include "convert_pack_sc12.hpp" + +/* + * Shuffle Orderings - Single 128-bit SSE register + * + *   16-bit interleaved I/Q + *  --------------------------------------- + * | Q3 | I3 | Q2 | I2 | Q1 | I1 | Q0 | I0 | Input + *  --------------------------------------- + * | 127                                 0 | + * + * + *   12-bit deinterleaved unpacked I/Q + *  --------------------------------------- + * | I3 | I2 | I1 | I0 | Q3 | Q2 | Q1 | Q0 | Shuffle-1 + *  --------------------------------------- + * | High bit aligned  |  4-bit >> offset  | + * + * + *   12-bit interleaved packed I/Q + *  --------------------------------------- + * |I0|Q0|I1|Q1|I2|Q2|I3|Q3|               | Shuffle-2 | Shuffle-3 + *  --------------------------------------- + * | 127                32 | 31  Empty   0 | + * + * + *     12-bit packed I/Q byteswapped + *      ----------------------- + *     |   I0   |   Q0   |  I1 | 0 + *     |-----------------------| + *     | I1 |  Q1  |  I2  | Q2 |             Output + *     |-----------------------| + *     | Q2  |   I3   |   Q3   | + *     |-----------------------| + *     |        Unused         | 3 + *      ----------------------- + *     31                     0 + */ +#define SC12_SHIFT_MASK      0xfff0fff0, 0xfff0fff0, 0x0fff0fff, 0x0fff0fff +#define SC12_PACK_SHUFFLE1   13,12,9,8,5,4,1,0,15,14,11,10,7,6,3,2 +#define SC12_PACK_SHUFFLE2   9,8,0,11,10,2,13,12,4,15,14,6,0,0,0,0 +#define SC12_PACK_SHUFFLE3   8,1,8,8,3,8,8,5,8,8,7,8,8,8,8,8 + +template <typename type> +inline void convert_star_4_to_sc12_item32_3 +( +    const std::complex<type> *in, +    item32_sc12_3x &output, +    const double scalar, +    typename std::enable_if<std::is_same<type, float>::value>::type* = NULL +) +{ +    __m128 m0, m1, m2; +    m0 = _mm_set1_ps(scalar); +    m1 = _mm_loadu_ps((const float *) &in[0]); +    m2 = _mm_loadu_ps((const float *) &in[2]); +    m1 = _mm_mul_ps(m1, m0); +    m2 = _mm_mul_ps(m2, m0); +    m0 = _mm_shuffle_ps(m1, m2, _MM_SHUFFLE(2, 0, 2, 0)); +    m1 = _mm_shuffle_ps(m1, m2, _MM_SHUFFLE(3, 1, 3, 1)); + +    __m128i m3, m4, m5, m6, m7; +    m3 = _mm_set_epi32(SC12_SHIFT_MASK); +    m4 = _mm_set_epi8(SC12_PACK_SHUFFLE2); +    m5 = _mm_set_epi8(SC12_PACK_SHUFFLE3); + +    m6 = _mm_cvtps_epi32(m0); +    m7 = _mm_cvtps_epi32(m1); +    m6 = _mm_slli_epi32(m6, 4); +    m6 = _mm_packs_epi32(m7, m6); +    m6 = _mm_and_si128(m6, m3); +    m7 = _mm_move_epi64(m6); + +    m6 = _mm_shuffle_epi8(m6, m4); +    m7 = _mm_shuffle_epi8(m7, m5); +    m6 = _mm_or_si128(m6, m7); + +    m6 = _mm_shuffle_epi32(m6, _MM_SHUFFLE(0, 1, 2, 3)); +    _mm_storeu_si128((__m128i*) &output, m6); +} + +template <typename type> +static void convert_star_4_to_sc12_item32_3 +( +    const std::complex<type> *in, +    item32_sc12_3x &output, +    const double, +    typename std::enable_if<std::is_same<type, short>::value>::type* = NULL +) +{ +    __m128i m0, m1, m2, m3, m4, m5; +    m0 = _mm_set_epi32(SC12_SHIFT_MASK); +    m1 = _mm_set_epi8(SC12_PACK_SHUFFLE1); +    m2 = _mm_set_epi8(SC12_PACK_SHUFFLE2); +    m3 = _mm_set_epi8(SC12_PACK_SHUFFLE3); + +    m4 = _mm_loadu_si128((__m128i*) in); +    m4 = _mm_shuffle_epi8(m4, m1); +    m5 = _mm_srli_epi16(m4, 4); +    m4 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(0, 0, 3, 2)); +    m4 = _mm_unpacklo_epi64(m5, m4); + +    m4 = _mm_and_si128(m4, m0); +    m5 = _mm_move_epi64(m4); +    m4 = _mm_shuffle_epi8(m4, m2); +    m5 = _mm_shuffle_epi8(m5, m3); +    m3 = _mm_or_si128(m4, m5); + +    m3 = _mm_shuffle_epi32(m3, _MM_SHUFFLE(0, 1, 2, 3)); +    _mm_storeu_si128((__m128i*) &output, m3); +} + +template <typename type, towire32_type towire> +struct convert_star_1_to_sc12_item32_2 : public converter +{ +    convert_star_1_to_sc12_item32_2(void):_scalar(0.0) +    { +    } + +    void set_scalar(const double scalar) +    { +        _scalar = scalar; +    } + +    void operator()(const input_type &inputs, const output_type &outputs, const size_t nsamps) +    { +        const std::complex<type> *input = reinterpret_cast<const std::complex<type> *>(inputs[0]); + +        const size_t head_samps = size_t(outputs[0]) & 0x3; +        int enable; +        size_t rewind = 0; +        switch(head_samps) +        { +            case 0: break; +            case 1: rewind = 9; break; +            case 2: rewind = 6; break; +            case 3: rewind = 3; break; +        } +        item32_sc12_3x *output = reinterpret_cast<item32_sc12_3x *>(size_t(outputs[0]) - rewind); + +        //helper variables +        size_t i = 0, o = 0; + +        //handle the head case +        switch (head_samps) +        { +        case 0: +            break; //no head +        case 1: +            enable = CONVERT12_LINE2; +            convert_star_4_to_sc12_item32_3<type, towire>(0, 0, 0, input[0], enable, output[o++], _scalar); +            break; +        case 2: +            enable = CONVERT12_LINE2 | CONVERT12_LINE1; +            convert_star_4_to_sc12_item32_3<type, towire>(0, 0, input[0], input[1], enable, output[o++], _scalar); +            break; +        case 3: +            enable = CONVERT12_LINE2 | CONVERT12_LINE1 | CONVERT12_LINE0; +            convert_star_4_to_sc12_item32_3<type, towire>(0, input[0], input[1], input[2], enable, output[o++], _scalar); +            break; +        } +        i += head_samps; + +        // SSE packed write output is 16 bytes which overwrites the 12-bit +        // packed struct by 4 bytes. There is no concern if there are +        // subsequent samples to be converted (writes will simply happen +        // twice). So set the conversion loop to force a tail case on the +        // final 4 or fewer samples. +        while (i+4 < nsamps) +        { +            convert_star_4_to_sc12_item32_3<type>(&input[i], output[o], _scalar); +            o++; i += 4; +        } + +        //handle the tail case +        const size_t tail_samps = nsamps - i; +        switch (tail_samps) +        { +        case 0: +            break; //no tail +        case 1: +            enable = CONVERT12_LINE0; +            convert_star_4_to_sc12_item32_3<type, towire>(input[i+0], 0, 0, 0, enable, output[o], _scalar); +            break; +        case 2: +            enable = CONVERT12_LINE0 | CONVERT12_LINE1; +            convert_star_4_to_sc12_item32_3<type, towire>(input[i+0], input[i+1], 0, 0, enable, output[o], _scalar); +            break; +        case 3: +            enable = CONVERT12_LINE0 | CONVERT12_LINE1 | CONVERT12_LINE2; +            convert_star_4_to_sc12_item32_3<type, towire>(input[i+0], input[i+1], input[i+2], 0, enable, output[o], _scalar); +            break; +        case 4: +            enable = CONVERT12_LINE_ALL; +            convert_star_4_to_sc12_item32_3<type, towire>(input[i+0], input[i+1], input[i+2], input[i+3], enable, output[o], _scalar); +            break; +        } +    } + +    double _scalar; +}; + +static converter::sptr make_convert_fc32_1_to_sc12_item32_le_1(void) +{ +    return converter::sptr(new convert_star_1_to_sc12_item32_2<float, uhd::wtohx>()); +} + +static converter::sptr make_convert_sc16_1_to_sc12_item32_le_1(void) +{ +    return converter::sptr(new convert_star_1_to_sc12_item32_2<short, uhd::wtohx>()); +} + +UHD_STATIC_BLOCK(register_sse_pack_sc12) +{ +    uhd::convert::id_type id; +    id.num_inputs = 1; +    id.num_outputs = 1; + +    id.input_format = "fc32"; +    id.output_format = "sc12_item32_le"; +    uhd::convert::register_converter(id, &make_convert_fc32_1_to_sc12_item32_le_1, PRIORITY_SIMD); + +    id.input_format = "sc16"; +    id.output_format = "sc12_item32_le"; +    uhd::convert::register_converter(id, &make_convert_sc16_1_to_sc12_item32_le_1, PRIORITY_SIMD); +} diff --git a/host/lib/convert/ssse3_unpack_sc12.cpp b/host/lib/convert/ssse3_unpack_sc12.cpp new file mode 100644 index 000000000..245e64ebc --- /dev/null +++ b/host/lib/convert/ssse3_unpack_sc12.cpp @@ -0,0 +1,219 @@ +// +// Copyright 2017 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program.  If not, see <http://www.gnu.org/licenses/>. +// + +#include "convert_unpack_sc12.hpp" +#include <emmintrin.h> +#include <tmmintrin.h> + +using namespace uhd::convert; + +/* + * Shuffle Orderings - Single 128-bit SSE register + * + *     12-bit packed I/Q byteswapped + *      ----------------------- + *     |   I0   |   Q0   |  I1 | 0 + *     |-----------------------| + *     | I1 |  Q1  |  I2  | Q2 |             Input + *     |-----------------------| + *     | Q2  |   I3   |   Q3   | 2 + *      ----------------------- + *     31                     0 + * + * + *   12-bit interleaved packed I/Q + *  --------------------------------------- + * |I0|Q0|I1|Q1|I2|Q2|I3|Q3|               | Byteswap Removed + *  --------------------------------------- + * | 127                32 | 31  Empty   0 | + * + * + *           Packed   Unpacked + *  Sample    Index    Index   Offset + * ===================================== + *    I0      15,14     0,1      0 + *    Q0      14,13     8,9      4 + *    I1      12,11     2,3      0 + *    Q1      11,10    10,11     4           12-bit Indices + *    I2       9,8      4,5      0 + *    Q2       8,7     12,13     4 + *    I3       6,5      6,7      0 + *    Q3       5,4     14,15     4 + * + * + *   12-bit deinterleaved unpacked I/Q + *  --------------------------------------- + * | Q3 | Q2 | Q1 | Q0 | I3 | I2 | I1 | I0 | Shuffle-1 + *  --------------------------------------- + * |  4-bit >> offset  | High bit aligned  | + * + * + *   16-bit interleaved I/Q + *  --------------------------------------- + * | Q3 | I3 | Q2 | I2 | Q1 | I1 | Q0 | I0 | Output (Shuffle-2) + *  --------------------------------------- + * | 127                                 0 | + * + */ +#define SC12_SHIFT_MASK      0x0fff0fff, 0x0fff0fff, 0xfff0fff0, 0xfff0fff0 +#define SC12_PACK_SHUFFLE1   5,4,8,7,11,10,14,13,6,5,9,8,12,11,15,14 +#define SC12_PACK_SHUFFLE2   15,14,7,6,13,12,5,4,11,10,3,2,9,8,1,0 + +template <typename type, tohost32_type tohost> +inline void convert_sc12_item32_3_to_star_4 +( +    const item32_sc12_3x &input, +    std::complex<type> *out, +    double scalar, +    typename std::enable_if<std::is_same<type, float>::value>::type* = NULL +) +{ +    __m128i m0, m1, m2, m3, m4; +    m0 = _mm_set_epi32(SC12_SHIFT_MASK); +    m1 = _mm_set_epi8(SC12_PACK_SHUFFLE1); +    m2 = _mm_loadu_si128((__m128i*) &input); +    m2 = _mm_shuffle_epi32(m2, _MM_SHUFFLE(0, 1, 2, 3)); +    m3 = _mm_shuffle_epi8(m2, m1); +    m3 = _mm_and_si128(m3, m0); + +    m4 = _mm_setzero_si128(); +    m1 = _mm_unpacklo_epi16(m4, m3); +    m2 = _mm_unpackhi_epi16(m4, m3); +    m2 = _mm_slli_epi32(m2, 4); +    m3 = _mm_unpacklo_epi32(m1, m2); +    m4 = _mm_unpackhi_epi32(m1, m2); + +    __m128 m5, m6, m7; +    m5 = _mm_set_ps1(scalar/(1 << 16)); +    m6 = _mm_cvtepi32_ps(m3); +    m7 = _mm_cvtepi32_ps(m4); +    m6 = _mm_mul_ps(m6, m5); +    m7 = _mm_mul_ps(m7, m5); + +    _mm_storeu_ps(reinterpret_cast<float*>(&out[0]), m6); +    _mm_storeu_ps(reinterpret_cast<float*>(&out[2]), m7); +} + +template <typename type, tohost32_type tohost> +inline void convert_sc12_item32_3_to_star_4 +( +    const item32_sc12_3x &input, +    std::complex<type> *out, +    double, +    typename std::enable_if<std::is_same<type, short>::value>::type* = NULL +) +{ +    __m128i m0, m1, m2, m3; +    m0 = _mm_set_epi32(SC12_SHIFT_MASK); +    m1 = _mm_set_epi8(SC12_PACK_SHUFFLE1); +    m2 = _mm_set_epi8(SC12_PACK_SHUFFLE2); + +    m3 = _mm_loadu_si128((__m128i*) &input); +    m3 = _mm_shuffle_epi32(m3, _MM_SHUFFLE(0, 1, 2, 3)); +    m3 = _mm_shuffle_epi8(m3, m1); +    m3 = _mm_and_si128(m3, m0); + +    m0 = _mm_slli_epi16(m3, 4); +    m1 = _mm_shuffle_epi32(m3, _MM_SHUFFLE(1, 0, 0, 0)); +    m0 = _mm_unpackhi_epi64(m1, m0); +    m1 = _mm_shuffle_epi8(m0, m2); + +    _mm_storeu_si128((__m128i*) out, m1); +} + +template <typename type, tohost32_type tohost> +struct convert_sc12_item32_1_to_star_2 : public converter +{ +    convert_sc12_item32_1_to_star_2(void):_scalar(0.0) +    { +        //NOP +    } + +    void set_scalar(const double scalar) +    { +        const int unpack_growth = 16; +        _scalar = scalar/unpack_growth; +    } + +    void operator()(const input_type &inputs, const output_type &outputs, const size_t nsamps) +    { +        const size_t head_samps = size_t(inputs[0]) & 0x3; +        size_t rewind = 0; +        switch(head_samps) +        { +            case 0: break; +            case 1: rewind = 9; break; +            case 2: rewind = 6; break; +            case 3: rewind = 3; break; +        } + +        const item32_sc12_3x *input = reinterpret_cast<const item32_sc12_3x *>(size_t(inputs[0]) - rewind); +        std::complex<type> *output = reinterpret_cast<std::complex<type> *>(outputs[0]); +        std::complex<type> dummy; +        size_t i = 0, o = 0; +        switch (head_samps) +        { +        case 0: break; //no head +        case 1: convert_sc12_item32_3_to_star_4<type, tohost>(input[i++], dummy, dummy, dummy, output[0], _scalar); break; +        case 2: convert_sc12_item32_3_to_star_4<type, tohost>(input[i++], dummy, dummy, output[0], output[1], _scalar); break; +        case 3: convert_sc12_item32_3_to_star_4<type, tohost>(input[i++], dummy, output[0], output[1], output[2], _scalar); break; +        } +        o += head_samps; + +        //convert the body +        while (o+3 < nsamps) +        { +           convert_sc12_item32_3_to_star_4<type, tohost>(input[i], &output[o], _scalar); +            i += 1; o += 4; +        } + +        const size_t tail_samps = nsamps - o; +        switch (tail_samps) +        { +        case 0: break; //no tail +        case 1: convert_sc12_item32_3_to_star_4<type, tohost>(input[i], output[o+0], dummy, dummy, dummy, _scalar); break; +        case 2: convert_sc12_item32_3_to_star_4<type, tohost>(input[i], output[o+0], output[o+1], dummy, dummy, _scalar); break; +        case 3: convert_sc12_item32_3_to_star_4<type, tohost>(input[i], output[o+0], output[o+1], output[o+2], dummy, _scalar); break; +        } +    } + +    double _scalar; +}; + +static converter::sptr make_convert_sc12_item32_le_1_to_fc32_1(void) +{ +    return converter::sptr(new convert_sc12_item32_1_to_star_2<float, uhd::wtohx>()); +} + +static converter::sptr make_convert_sc12_item32_le_1_to_sc16_1(void) +{ +    return converter::sptr(new convert_sc12_item32_1_to_star_2<short, uhd::wtohx>()); +} + +UHD_STATIC_BLOCK(register_sse_unpack_sc12) +{ +    uhd::convert::id_type id; +    id.num_inputs = 1; +    id.num_outputs = 1; +    id.output_format = "fc32"; +    id.input_format = "sc12_item32_le"; +    uhd::convert::register_converter(id, &make_convert_sc12_item32_le_1_to_fc32_1, PRIORITY_SIMD); + +    id.output_format = "sc16"; +    id.input_format = "sc12_item32_le"; +    uhd::convert::register_converter(id, &make_convert_sc12_item32_le_1_to_sc16_1, PRIORITY_SIMD); +}  | 
