diff options
Diffstat (limited to 'host')
| -rw-r--r-- | host/lib/convert/convert_with_sse2.cpp | 204 | ||||
| -rw-r--r-- | host/utils/usrp1p_gpif_loopback.cpp | 111 | ||||
| -rw-r--r-- | host/utils/usrp1p_poketest.cpp | 134 | 
3 files changed, 126 insertions, 323 deletions
diff --git a/host/lib/convert/convert_with_sse2.cpp b/host/lib/convert/convert_with_sse2.cpp index 52beea24a..9772028dc 100644 --- a/host/lib/convert/convert_with_sse2.cpp +++ b/host/lib/convert/convert_with_sse2.cpp @@ -25,25 +25,37 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){      const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);      item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); -    __m128 scalar = _mm_set_ps1(float(scale_factor)); - -    //convert blocks of samples with intrinsics -    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ -        //load from input -        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); -        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); - -        //convert and scale -        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); -        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); - -        //pack + swap 16-bit pairs -        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); -        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); -        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); - -        //store to output -        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); +    const __m128 scalar = _mm_set_ps1(float(scale_factor)); + +    #define convert_fc32_1_to_item32_1_nswap_guts(_al_)                 \ +    for (; i < (nsamps & ~0x3); i+=4){                                  \ +        /* load from input */                                           \ +        __m128 tmplo = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+0)); \ +        __m128 tmphi = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+2)); \ +                                                                        \ +        /* convert and scale */ \ +        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \ +        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));    \ +                                                                        \ +        /* pack + swap 16-bit pairs */                                  \ +        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \ +        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \ +        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \ +                                                                        \ +        /* store to output */                                           \ +        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \ +    }                                                                   \ + +    size_t i = 0; + +    //dispatch according to alignment +    switch (size_t(input) & 0xf){ +    case 0x8: +        output[i] = fc32_to_item32(input[i], float(scale_factor)); i++; +    case 0x0: +        convert_fc32_1_to_item32_1_nswap_guts() +        break; +    default: convert_fc32_1_to_item32_1_nswap_guts(u)      }      //convert remainder @@ -56,24 +68,36 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){      const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);      item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); -    __m128 scalar = _mm_set_ps1(float(scale_factor)); - -    //convert blocks of samples with intrinsics -    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ -        //load from input -        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); -        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); - -        //convert and scale -        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); -        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); - -        //pack + byteswap -> byteswap 16 bit words -        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); -        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); - -        //store to output -        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); +    const __m128 scalar = _mm_set_ps1(float(scale_factor)); + +    #define convert_fc32_1_to_item32_1_bswap_guts(_al_)                 \ +    for (; i < (nsamps & ~0x3); i+=4){                                  \ +        /* load from input */                                           \ +        __m128 tmplo = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+0)); \ +        __m128 tmphi = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+2)); \ +                                                                        \ +        /* convert and scale */ \ +        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \ +        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));    \ +                                                                        \ +        /* pack + byteswap -> byteswap 16 bit words */                  \ +        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \ +        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ +                                                                        \ +        /* store to output */                                           \ +        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \ +    }                                                                   \ + +    size_t i = 0; + +    //dispatch according to alignment +    switch (size_t(input) & 0xf){ +    case 0x8: +        output[i] = uhd::byteswap(fc32_to_item32(input[i], float(scale_factor))); i++; +    case 0x0: +        convert_fc32_1_to_item32_1_bswap_guts() +        break; +    default: convert_fc32_1_to_item32_1_bswap_guts(u)      }      //convert remainder @@ -86,27 +110,39 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){      const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);      fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); -    __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); -    __m128i zeroi = _mm_setzero_si128(); - -    //convert blocks of samples with intrinsics -    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ -        //load from input -        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); - -        //unpack + swap 16-bit pairs -        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); -        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); -        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits -        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); - -        //convert and scale -        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); -        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); - -        //store to output -        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); -        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); +    const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); +    const __m128i zeroi = _mm_setzero_si128(); + +    #define convert_item32_1_to_fc32_1_nswap_guts(_al_)                 \ +    for (; i < (nsamps & ~0x3); i+=4){                                  \ +        /* load from input */                                           \ +        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ +                                                                        \ +        /* unpack + swap 16-bit pairs */                                \ +        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \ +        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \ +        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ +        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \ +                                                                        \ +        /* convert and scale */                                         \ +        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);     \ +        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);     \ +                                                                        \ +        /* store to output */                                           \ +        _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+0), tmplo); \ +        _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+2), tmphi); \ +    }                                                                   \ + +    size_t i = 0; + +    //dispatch according to alignment +    switch (size_t(output) & 0xf){ +    case 0x8: +        output[i] = item32_to_fc32(input[i], float(scale_factor)); i++; +    case 0x0: +        convert_item32_1_to_fc32_1_nswap_guts() +        break; +    default: convert_item32_1_to_fc32_1_nswap_guts(u)      }      //convert remainder @@ -119,26 +155,38 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){      const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);      fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); -    __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); -    __m128i zeroi = _mm_setzero_si128(); - -    //convert blocks of samples with intrinsics -    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ -        //load from input -        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); - -        //byteswap + unpack -> byteswap 16 bit words -        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); -        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits -        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); - -        //convert and scale -        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); -        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); - -        //store to output -        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); -        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); +    const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); +    const __m128i zeroi = _mm_setzero_si128(); + +    #define convert_item32_1_to_fc32_1_bswap_guts(_al_)                 \ +    for (; i < (nsamps & ~0x3); i+=4){                                  \ +        /* load from input */                                           \ +        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ +                                                                        \ +        /* byteswap + unpack -> byteswap 16 bit words */                \ +        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ +        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ +        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \ +                                                                        \ +        /* convert and scale */                                         \ +        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);     \ +        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);     \ +                                                                        \ +        /* store to output */                                           \ +        _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+0), tmplo); \ +        _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+2), tmphi); \ +    }                                                                   \ + +    size_t i = 0; + +    //dispatch according to alignment +    switch (size_t(output) & 0xf){ +    case 0x8: +        output[i] = item32_to_fc32(uhd::byteswap(input[i]), float(scale_factor)); i++; +    case 0x0: +        convert_item32_1_to_fc32_1_bswap_guts() +        break; +    default: convert_item32_1_to_fc32_1_bswap_guts(u)      }      //convert remainder diff --git a/host/utils/usrp1p_gpif_loopback.cpp b/host/utils/usrp1p_gpif_loopback.cpp deleted file mode 100644 index 3b9da4304..000000000 --- a/host/utils/usrp1p_gpif_loopback.cpp +++ /dev/null @@ -1,111 +0,0 @@ -//USB->GPIF->FPGA loopback test for USRP1P -//uses UHD libusb transport - -#include <uhd/device.hpp> -#include <uhd/transport/usb_zero_copy.hpp> -#include <uhd/transport/bounded_buffer.hpp> -#include <uhd/transport/usb_control.hpp> -#include <uhd/utils/assert.hpp> -#include <boost/shared_array.hpp> -#include <boost/foreach.hpp> -#include <boost/thread.hpp> -#include <boost/format.hpp> -#include <vector> -#include <iostream> -#include <iomanip> - -//so the goal is to open a USB device to endpoints (2,6), submit a buffer, receive a reply, and compare them. -//use usb_zero_copy::make() to get a usb_zero_copy object and then start submitting. -//need to get a usb dev handle to pass to make -//use static std::vector<usb_device_handle::sptr> get_device_list(boost::uint16_t vid, boost::uint16_t pid) to get a device handle -//then get_send_buffer, send, etc. -using namespace uhd; -using namespace uhd::transport; - -const boost::uint16_t data_xfer_size = 32; -const boost::uint16_t ctrl_xfer_size = 32; - -int main(int argc, char *argv[]) { -    std::cout << "USRP1+ GPIF loopback test" << std::endl; -    //step 1: get a handle on it -    std::vector<usb_device_handle::sptr> handles = usb_device_handle::get_device_list(0xfffe, 0x0003); -    if(handles.size() == 0) { -        std::cout << "No USRP1+ found." << std::endl; -        return ~0; -    } -     -    bool verbose = false; -    if(argc > 1) if(std::string(argv[1]) == "-v") verbose = true; -     -    usb_device_handle::sptr handle = handles.front(); - -    usb_zero_copy::sptr data_transport; -    usb_control::sptr ctrl_transport = usb_control::make(handle); //just in case - -    data_transport = usb_zero_copy::make( -                handle,        // identifier -                8,             // IN endpoint -                4,             // OUT endpoint -                uhd::device_addr_t("recv_frame_size=32, num_recv_frames=1, send_frame_size=32, num_send_frames=1") //args -    ); -     -    if(verbose) std::cout << "Made." << std::endl; -     -    //ok now we're made. time to get a buffer and start sending data. -     -    boost::uint8_t localbuf[data_xfer_size]; - -    managed_send_buffer::sptr sbuf; -    managed_recv_buffer::sptr rbuf; -    size_t xfercount = 0; -     -    srand(time(0)); -    while(1) { - -        if(verbose) std::cout << "Getting send buffer." << std::endl; -        sbuf = data_transport->get_send_buff(); -        if(sbuf == 0) { -            std::cout << "Failed to get a send buffer." << std::endl; -            return ~0; -        } -        for(int i = 0; i < data_xfer_size; i++) { -            boost::uint8_t x = rand(); -            sbuf->cast<boost::uint8_t *>()[i] = x; -            localbuf[i] = x; -        } -         -        if(verbose) std::cout << "Buffer loaded" << std::endl; - -        sbuf->commit(data_xfer_size); -        if(verbose) std::cout << "Committed." << std::endl; - -        rbuf = data_transport->get_recv_buff(0.3); //timeout -         -        if(rbuf == 0) { -            std::cout << "Failed to get receive buffer (timeout?)" << std::endl; -            return ~0; -        } -         -        if(verbose) std::cout << "# " << xfercount << std::endl; -     -        if(!memcmp(rbuf->cast<const boost::uint8_t *>(), localbuf, data_xfer_size)) { -            std::cout << "."; -        } else { -            if(verbose) { -                int i = 0; -                for(int j = 0; j < 32; j++) { -                    std::cout << boost::format("%02X ") % int(rbuf->cast<const boost::uint8_t *>()[i*32+j]); -                } -                std::cout << std::endl; -            }     -            else std::cout << "x"; - -        } -        sbuf.reset(); -        rbuf.reset(); -        xfercount++; -        //if(verbose) std::cout << "sptrs reset" << std::endl; -    } -     -    return 0; -} diff --git a/host/utils/usrp1p_poketest.cpp b/host/utils/usrp1p_poketest.cpp deleted file mode 100644 index ca7628e01..000000000 --- a/host/utils/usrp1p_poketest.cpp +++ /dev/null @@ -1,134 +0,0 @@ -//FPGA register poke test for USRP1P -//uses UHD libusb transport - -#include <uhd/device.hpp> -#include <uhd/transport/usb_zero_copy.hpp> -#include <uhd/transport/bounded_buffer.hpp> -#include <uhd/transport/usb_control.hpp> -#include <uhd/utils/assert.hpp> -#include <boost/shared_array.hpp> -#include <boost/foreach.hpp> -#include <boost/thread.hpp> -#include <boost/format.hpp> -#include <vector> -#include <iostream> -#include <iomanip> -#include "../lib/usrp/usrp1p/ctrl_packet.hpp" - -//so the goal is to open a USB device to endpoints (2,6), submit a buffer, receive a reply, and compare them. -//use usb_zero_copy::make() to get a usb_zero_copy object and then start submitting. -//need to get a usb dev handle to pass to make -//use static std::vector<usb_device_handle::sptr> get_device_list(boost::uint16_t vid, boost::uint16_t pid) to get a device handle -//then get_send_buffer, send, etc. -using namespace uhd; -using namespace uhd::transport; - -const boost::uint16_t ctrl_xfer_size = 32; - -int main(int argc, char *argv[]) { -    std::cout << "USRP1+ GPIF poke test" << std::endl; -    //step 1: get a handle on it -    std::vector<usb_device_handle::sptr> handles = usb_device_handle::get_device_list(0xfffe, 0x0003); -    if(handles.size() == 0) { -        std::cout << "No USRP1+ found." << std::endl; -        return ~0; -    } -     -    bool verbose = false; -    if(argc > 1) if(std::string(argv[1]) == "-v") verbose = true; -     -    usb_device_handle::sptr handle = handles.front(); - -    usb_zero_copy::sptr data_transport; -    usb_control::sptr ctrl_transport = usb_control::make(handle); //just in case - -    data_transport = usb_zero_copy::make( -                handle,        // identifier -                8,             // IN endpoint -                4,             // OUT endpoint -                uhd::device_addr_t("recv_frame_size=32, num_recv_frames=1, send_frame_size=32, num_send_frames=1") //args -    ); -     -    if(verbose) std::cout << "Made." << std::endl; -     -    //ok now we're made. time to get a buffer and start sending data. - -    managed_send_buffer::sptr sbuf; -    managed_recv_buffer::sptr rbuf; -    size_t xfercount = 0; -     -    static uint8_t sequence = 0; -    //uhd::usrp::ctrl_packet_out_t outpkt; -    //memset(outpkt.data, 0x00, sizeof(outpkt.data)); -//    outpkt.op = uhd::usrp::CTRL_PACKET_WRITE; -//    outpkt.callbacks = 0; -//    outpkt.seq = sequence++; -//    outpkt.len = 4; -//    outpkt.addr = 0x00000000; -//    outpkt.data[0] = 0xff; -//    outpkt.data[1] = 0xfe; -//    outpkt.data[2] = 0xfd; -//    outpkt.data[3] = 0xfc; - -    boost::uint16_t outpkt[16]; -    /* Packet format: -     * Command: 2 bits -     * Callbacks: 6 bits -     * Seq num: 8 bits -     * Length: 16 bits -     * Addr LSW: 16 bits -     * Addr MSW: 16 bits -     * Data: 24 bytes/12 words -     * Lengths are in lines -     *  -     * readback: -     * AA00 LEN(16) SEQ(16) ADDR(32) DATA(16bx12B) -     */ -    memset(outpkt, 0x00, sizeof(outpkt)); -    outpkt[0] = 0x8000; //read cmd + callbacks (0) + seq -    outpkt[1] = 0x0001; //len -    outpkt[2] = 0x0000; //addr LSW -    outpkt[3] = 0x0000; //addr MSW -    outpkt[4] = 0x0A0A; //data -    outpkt[5] = 0xFFFF; -     -     -    srand(time(0)); -//    while(1) { - -        if(verbose) std::cout << "Getting send buffer." << std::endl; -        sbuf = data_transport->get_send_buff(); -        if(sbuf == 0) { -            std::cout << "Failed to get a send buffer." << std::endl; -            return ~0; -        } -         -        for(int i = 0; i < ctrl_xfer_size; i++) { -            sbuf->cast<boost::uint8_t *>()[i] = ((boost::uint8_t *)&outpkt)[i]; -        } -         -        if(verbose) std::cout << "Buffer loaded" << std::endl; - -        sbuf->commit(ctrl_xfer_size); -        if(verbose) std::cout << "Committed." << std::endl; - -        rbuf = data_transport->get_recv_buff(0.3); //timeout -         -        if(rbuf == 0) { -            std::cout << "Failed to get receive buffer (timeout?)" << std::endl; -            return ~0; -        } -         -        for(int j = 0; j < 32; j++) { -            std::cout << boost::format("%02X ") % int(rbuf->cast<const boost::uint8_t *>()[j]); -        } -        std::cout << std::endl; - -        sbuf.reset(); -        rbuf.reset(); -        xfercount++; -        //if(verbose) std::cout << "sptrs reset" << std::endl; -//    } -     -    return 0; -}  | 
