Hi!
I was glad to see that Linux kernel version was upgraded to 4.9 in L4T 31.0.2 and was hoping to make use of usbfs zerocopy feature that was merged in 4.6 kernel:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f7d34b445abc00e979b7cf36b9580ac3d1a47cd8
But testing showed that it doesn’t actually work (data is not written to the passed buffer). I’ve prepared a simple test-case that demonstrates this problem with any USB mass-storage device (you have to adjust VENDOR and PRODUCT to the USB ID of flash drive you are using, and maybe some other settings too:
#include <libusb.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#define BLOCK_SIZE 512
#define READ_BLOCKS 4
#define BUF_SIZE (READ_BLOCKS*BLOCK_SIZE)
#define VENDOR 0x1e3d
#define PRODUCT 0x2096
#define INTERFACE 0
#define ENDPOINT 1
#define LUN 0
struct command_block_wrapper {
uint8_t dCBWSignature[4];
uint32_t dCBWTag;
uint32_t dCBWDataTransferLength;
uint8_t bmCBWFlags;
uint8_t bCBWLUN;
uint8_t bCBWCBLength;
uint8_t CBWCB[16];
};
struct command_status_wrapper {
uint8_t dCSWSignature[4];
uint32_t dCSWTag;
uint32_t dCSWDataResidue;
uint8_t bCSWStatus;
};
uint32_t tag = 0;
void send_mass_storage_command(libusb_device_handle *handle, int data_length) {
struct command_block_wrapper cbw = {};
int size = 0;
cbw.dCBWSignature[0] = 'U';
cbw.dCBWSignature[1] = 'S';
cbw.dCBWSignature[2] = 'B';
cbw.dCBWSignature[3] = 'C';
cbw.dCBWTag = ++tag;
cbw.dCBWDataTransferLength = data_length;
cbw.bmCBWFlags = LIBUSB_ENDPOINT_IN;
cbw.bCBWLUN = LUN;
cbw.bCBWCBLength = 10;
cbw.CBWCB[0] = 0x28; // Read(10)
cbw.CBWCB[8] = data_length / BLOCK_SIZE;
libusb_bulk_transfer(handle, ENDPOINT, (unsigned char*)&cbw, 31, &size, 1000);
}
void get_mass_storage_status(libusb_device_handle *handle) {
struct command_status_wrapper csw = {};
int size = 0;
libusb_bulk_transfer(handle, ENDPOINT|LIBUSB_ENDPOINT_IN, (unsigned char*)&csw, 13, &size, 1000);
if(size != 13 || csw.dCSWTag != tag || csw.bCSWStatus)
printf("command failed!\n");
}
int main() {
libusb_context *ctx = NULL;
if(libusb_init(&ctx)) return 1;
libusb_device **list = NULL;
libusb_device_handle *handle = NULL;
struct libusb_device_descriptor desc = {};
ssize_t cnt = libusb_get_device_list(ctx, &list);
for(ssize_t i = 0; i < cnt; i++) {
if(libusb_get_device_descriptor(list[i], &desc)) continue;
if(!(desc.idVendor == VENDOR && desc.idProduct == PRODUCT)) continue;
if(libusb_open(list[i], &handle)) continue;
libusb_set_auto_detach_kernel_driver(handle, 1);
if(libusb_claim_interface(handle, INTERFACE)) { libusb_close(handle); continue; }
unsigned char* buf_malloc = malloc(BUF_SIZE);
unsigned char* buf_libusb = libusb_dev_mem_alloc(handle, BUF_SIZE);
memset(buf_malloc, 0, BUF_SIZE);
memset(buf_libusb, 0, BUF_SIZE);
int size = 0;
send_mass_storage_command(handle, BUF_SIZE);
libusb_bulk_transfer(handle, ENDPOINT|LIBUSB_ENDPOINT_IN, buf_malloc, BUF_SIZE, &size, 5000);
get_mass_storage_status(handle);
send_mass_storage_command(handle, BUF_SIZE);
libusb_bulk_transfer(handle, ENDPOINT|LIBUSB_ENDPOINT_IN, buf_libusb, BUF_SIZE, &size, 5000);
get_mass_storage_status(handle);
for(int j = 0; j < BUF_SIZE; j++) {
if(buf_malloc[j]) {
printf("index of first non-null byte in buf_malloc: 0x%04x\n", j);
break;
}
}
for(int j = 0; j < BUF_SIZE; j++) {
if(buf_libusb[j]) {
printf("index of first non-null byte in buf_libusb: 0x%04x\n", j);
break;
}
}
libusb_dev_mem_free(handle, buf_libusb, BUF_SIZE);
free(buf_malloc);
libusb_release_interface(handle, INTERFACE);
libusb_close(handle);
break;
}
if(cnt >= 0)
libusb_free_device_list(list, 1);
libusb_exit(ctx);
return 0;
}
Compile and run like this:
sudo apt install libusb-1.0-0-dev
gcc test.c `pkg-config --libs --cflags libusb-1.0` -o test
sudo ./test
Expected results look something like that (tested on ordinary PC):
index of first non-null byte in buf_malloc: 0x01b8
index of first non-null byte in buf_libusb: 0x01b8
Actual result on AGX Xavier (note that buf_libusb is all zeroes):
index of first non-null byte in buf_malloc: 0x01b8
For usbfs zerocopy feature to work kernel has to be configured with CONFIG_DMA_CMA option enabled and CONFIG_CMA_SIZE_MBYTES set to some value (or use cma= cmdline kernel parameter). Kernel in L4T has these settings set up correctly.
Having this feature working will allow considerably lowering the CPU usage of USB transfers by getting rid of unnecessary memcpy. On the other hand current state breaks valid code - data is not received by the application but no error is reported.