usbfs zerocopy not working

Hi!

I was glad to see that Linux kernel version was upgraded to 4.9 in L4T 31.0.2 and was hoping to make use of usbfs zerocopy feature that was merged in 4.6 kernel:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f7d34b445abc00e979b7cf36b9580ac3d1a47cd8

But testing showed that it doesn’t actually work (data is not written to the passed buffer). I’ve prepared a simple test-case that demonstrates this problem with any USB mass-storage device (you have to adjust VENDOR and PRODUCT to the USB ID of flash drive you are using, and maybe some other settings too:

#include <libusb.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>

#define BLOCK_SIZE 512
#define READ_BLOCKS 4
#define BUF_SIZE (READ_BLOCKS*BLOCK_SIZE)
#define VENDOR 0x1e3d
#define PRODUCT 0x2096
#define INTERFACE 0
#define ENDPOINT 1
#define LUN 0

struct command_block_wrapper {
        uint8_t dCBWSignature[4];
        uint32_t dCBWTag;
        uint32_t dCBWDataTransferLength;
        uint8_t bmCBWFlags;
        uint8_t bCBWLUN;
        uint8_t bCBWCBLength;
        uint8_t CBWCB[16];
};

struct command_status_wrapper {
        uint8_t dCSWSignature[4];
        uint32_t dCSWTag;
        uint32_t dCSWDataResidue;
        uint8_t bCSWStatus;
};

uint32_t tag = 0;

void send_mass_storage_command(libusb_device_handle *handle, int data_length) {
        struct command_block_wrapper cbw = {};
        int size = 0;
        cbw.dCBWSignature[0] = 'U';
        cbw.dCBWSignature[1] = 'S';
        cbw.dCBWSignature[2] = 'B';
        cbw.dCBWSignature[3] = 'C';
        cbw.dCBWTag = ++tag;
        cbw.dCBWDataTransferLength = data_length;
        cbw.bmCBWFlags = LIBUSB_ENDPOINT_IN;
        cbw.bCBWLUN = LUN;
        cbw.bCBWCBLength = 10;
        cbw.CBWCB[0] = 0x28; // Read(10)
        cbw.CBWCB[8] = data_length / BLOCK_SIZE;
        libusb_bulk_transfer(handle, ENDPOINT, (unsigned char*)&cbw, 31, &size, 1000);
}

void get_mass_storage_status(libusb_device_handle *handle) {
        struct command_status_wrapper csw = {};
        int size = 0;
        libusb_bulk_transfer(handle, ENDPOINT|LIBUSB_ENDPOINT_IN, (unsigned char*)&csw, 13, &size, 1000);
        if(size != 13 || csw.dCSWTag != tag || csw.bCSWStatus)
                printf("command failed!\n");
}

int main() {
        libusb_context *ctx = NULL;
        if(libusb_init(&ctx)) return 1;
        libusb_device **list = NULL;
        libusb_device_handle *handle = NULL;
        struct libusb_device_descriptor desc = {};
        ssize_t cnt = libusb_get_device_list(ctx, &list);
        for(ssize_t i = 0; i < cnt; i++) {
                if(libusb_get_device_descriptor(list[i], &desc)) continue;
                if(!(desc.idVendor == VENDOR && desc.idProduct == PRODUCT)) continue;
                if(libusb_open(list[i], &handle)) continue;
                libusb_set_auto_detach_kernel_driver(handle, 1);
                if(libusb_claim_interface(handle, INTERFACE)) { libusb_close(handle); continue; }
                unsigned char* buf_malloc = malloc(BUF_SIZE);
                unsigned char* buf_libusb = libusb_dev_mem_alloc(handle, BUF_SIZE);
                memset(buf_malloc, 0, BUF_SIZE);
                memset(buf_libusb, 0, BUF_SIZE);
                int size = 0;
                send_mass_storage_command(handle, BUF_SIZE);
                libusb_bulk_transfer(handle, ENDPOINT|LIBUSB_ENDPOINT_IN, buf_malloc, BUF_SIZE, &size, 5000);
                get_mass_storage_status(handle);
                send_mass_storage_command(handle, BUF_SIZE);
                libusb_bulk_transfer(handle, ENDPOINT|LIBUSB_ENDPOINT_IN, buf_libusb, BUF_SIZE, &size, 5000);
                get_mass_storage_status(handle);
                for(int j = 0; j < BUF_SIZE; j++) {
                        if(buf_malloc[j]) {
                                printf("index of first non-null byte in buf_malloc: 0x%04x\n", j);
                                break;
                        }
                }
                for(int j = 0; j < BUF_SIZE; j++) {
                        if(buf_libusb[j]) {
                                printf("index of first non-null byte in buf_libusb: 0x%04x\n", j);
                                break;
                        }
                }
                libusb_dev_mem_free(handle, buf_libusb, BUF_SIZE);
                free(buf_malloc);
                libusb_release_interface(handle, INTERFACE);
                libusb_close(handle);
                break;
        }
        if(cnt >= 0)
                libusb_free_device_list(list, 1);
        libusb_exit(ctx);
        return 0;
}

Compile and run like this:

sudo apt install libusb-1.0-0-dev
gcc test.c `pkg-config --libs --cflags libusb-1.0` -o test
sudo ./test

Expected results look something like that (tested on ordinary PC):

index of first non-null byte in buf_malloc: 0x01b8
index of first non-null byte in buf_libusb: 0x01b8

Actual result on AGX Xavier (note that buf_libusb is all zeroes):

index of first non-null byte in buf_malloc: 0x01b8

For usbfs zerocopy feature to work kernel has to be configured with CONFIG_DMA_CMA option enabled and CONFIG_CMA_SIZE_MBYTES set to some value (or use cma= cmdline kernel parameter). Kernel in L4T has these settings set up correctly.

Having this feature working will allow considerably lowering the CPU usage of USB transfers by getting rid of unnecessary memcpy. On the other hand current state breaks valid code - data is not received by the application but no error is reported.

Hi parafin,
Does it work on r28.2.1 TX1(or TX2)?
any USB mass-storage device means plugging any USB2.0 pendrive into Type-A port on devkit??

Hi DaneLLL,

L4T R28.2.1 has kernel 4.4 which is too old to have support for usbfs zerocopy. So this functionality is currently unavailable on TX1 and TX2. I know one other ARM platform with new enough kernel - Odroid-XU4, usbfs zerocopy fails there too (even makes system crash). So I’m not really sure if usbfs zerocopy ever worked on ARM architecture. Though I don’t see any reason why it can’t. There was another patch for USB zerocopy before current version was merged to the Linux kernel and it did work on ARM fine.

Yes, any pendrive will do (shouldn’t matter if it’s USB2 or USB3, but I’ve tested with USB2 drive). You should look at lsusb output and adjust VENDOR and PRODUCT accordingly in the code.

Hi parafin,
I ran the app with SanDisk pendrive but hit error.

nvidia@jetson-0422418042113:~$ sudo ./test
command failed!
command failed!
nvidia@jetson-0422418042113:~$ dmesg
[  673.138074] tegra-xusb 3610000.xhci: exiting ELPG
[  673.143910] tegra-xusb 3610000.xhci: Firmware timestamp: 2018-03-29 14:24:42 UTC, Version: 60.05 release
[  673.145892] tegra-xusb 3610000.xhci: exiting ELPG done
[  673.516232] usb 2-4: new SuperSpeed USB device number 3 using tegra-xusb
[  673.537013] usb 2-4: New USB device found, idVendor=0781, idProduct=5580
[  673.537021] usb 2-4: New USB device strings: Mfr=1, Product=2, SerialNumber=3
[  673.537025] usb 2-4: Product: Extreme
[  673.537028] usb 2-4: Manufacturer: SanDisk
[  673.537031] usb 2-4: SerialNumber: AA010104130944495598
[  673.542669] usb-storage 2-4:1.0: USB Mass Storage device detected
[  673.544924] scsi host2: usb-storage 2-4:1.0
[  674.553806] scsi 2:0:0:0: Direct-Access     SanDisk  Extreme          0001 PQ: 0 ANSI: 6
[  674.559528] sd 2:0:0:0: [sda] 62533296 512-byte logical blocks: (32.0 GB/29.8 GiB)
[  674.561718] sd 2:0:0:0: [sda] Write Protect is off
[  674.561963] sd 2:0:0:0: [sda] Mode Sense: 53 00 00 08
[  674.564189] sd 2:0:0:0: [sda] Write cache: disabled, read cache: enabled, doesn't support DPO or FUA
[  674.578693]  sda: sda1
[  674.587916] sd 2:0:0:0: [sda] Attached SCSI removable disk
[  675.050231] FAT-fs (sda1): Volume was not properly unmounted. Some data may be corrupt. Please run fsck.
[  683.469194] usb-storage 2-4:1.0: USB Mass Storage device detected
[  683.469908] scsi host2: usb-storage 2-4:1.0
[  684.628243] usb 2-4: reset SuperSpeed USB device number 3 using tegra-xusb
[  684.899615] scsi 2:0:0:0: Direct-Access     SanDisk  Extreme          0001 PQ: 0 ANSI: 6
[  684.902888] sd 2:0:0:0: [sda] 62533296 512-byte logical blocks: (32.0 GB/29.8 GiB)
[  684.905868] sd 2:0:0:0: [sda] Write Protect is off
[  684.906111] sd 2:0:0:0: [sda] Mode Sense: 53 00 00 08
[  684.908324] sd 2:0:0:0: [sda] Write cache: disabled, read cache: enabled, doesn't support DPO or FUA
[  684.922337]  sda: sda1
[  684.932912] sd 2:0:0:0: [sda] Attached SCSI removable disk
[  685.410687] FAT-fs (sda1): Volume was not properly unmounted. Some data may be corrupt. Please run fsck.

I have modified

#define VENDOR 0x0781
#define PRODUCT 0x5580

Any idea?

Hi DaneLLL,

can you please show the output of this command:

sudo lsusb -vd 0781:5580

In the meantime I will try to modify the test-case to be more robust.

Hi parafin,

Please check below information:

nvidia@jetson-0422418042113:~$ sudo lsusb -vd 0781:5580
[sudo] password for nvidia:

Bus 002 Device 002: ID 0781:5580 SanDisk Corp. SDCZ80 Flash Drive
Device Descriptor:
  bLength                18
  bDescriptorType         1
  bcdUSB               3.00
  bDeviceClass            0 (Defined at Interface level)
  bDeviceSubClass         0
  bDeviceProtocol         0
  bMaxPacketSize0         9
  idVendor           0x0781 SanDisk Corp.
  idProduct          0x5580 SDCZ80 Flash Drive
  bcdDevice            0.10
  iManufacturer           1 SanDisk
  iProduct                2 Extreme
  iSerial                 3 AA010104130944495598
  bNumConfigurations      1
  Configuration Descriptor:
    bLength                 9
    bDescriptorType         2
    wTotalLength           44
    bNumInterfaces          1
    bConfigurationValue     1
    iConfiguration          0
    bmAttributes         0x80
      (Bus Powered)
    MaxPower              100mA
    Interface Descriptor:
      bLength                 9
      bDescriptorType         4
      bInterfaceNumber        0
      bAlternateSetting       0
      bNumEndpoints           2
      bInterfaceClass         8 Mass Storage
      bInterfaceSubClass      6 SCSI
      bInterfaceProtocol     80 Bulk-Only
      iInterface              0
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x81  EP 1 IN
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0400  1x 1024 bytes
        bInterval               0
        bMaxBurst              15
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x02  EP 2 OUT
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0400  1x 1024 bytes
        bInterval               0
        bMaxBurst              15
Binary Object Store Descriptor:
  bLength                 5
  bDescriptorType        15
  wTotalLength           22
  bNumDeviceCaps          2
  USB 2.0 Extension Device Capability:
    bLength                 7
    bDescriptorType        16
    bDevCapabilityType      2
    bmAttributes   0x00000002
      Link Power Management (LPM) Supported
  SuperSpeed USB Device Capability:
    bLength                10
    bDescriptorType        16
    bDevCapabilityType      3
    bmAttributes         0x00
    wSpeedsSupported   0x000e
      Device can operate at Full Speed (12Mbps)
      Device can operate at High Speed (480Mbps)
      Device can operate at SuperSpeed (5Gbps)
    bFunctionalitySupport   1
      Lowest fully-functional device speed is Full Speed (12Mbps)
    bU1DevExitLat           7 micro seconds
    bU2DevExitLat         101 micro seconds
Device Status:     0x000c
  (Bus Powered)
  U1 Enabled
  U2 Enabled

Please try this updated code:

#include <libusb.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>

#define BLOCK_SIZE 512
#define READ_BLOCKS 4
#define BUF_SIZE (READ_BLOCKS*BLOCK_SIZE)
#define VENDOR 0x0781
#define PRODUCT 0x5580
#define INTERFACE 0
#define ENDPOINT_OUT 0x2
#define ENDPOINT_IN 0x81
uint8_t LUN = 0;

struct command_block_wrapper {
	uint8_t dCBWSignature[4];
	uint32_t dCBWTag;
	uint32_t dCBWDataTransferLength;
	uint8_t bmCBWFlags;
	uint8_t bCBWLUN;
	uint8_t bCBWCBLength;
	uint8_t CBWCB[16];
};

struct command_status_wrapper {
	uint8_t dCSWSignature[4];
	uint32_t dCSWTag;
	uint32_t dCSWDataResidue;
	uint8_t bCSWStatus;
};

uint32_t tag = 0;

void send_mass_storage_command(libusb_device_handle *handle, int data_length) {
	struct command_block_wrapper cbw = {};
	int size = 0;
	cbw.dCBWSignature[0] = 'U';
	cbw.dCBWSignature[1] = 'S';
	cbw.dCBWSignature[2] = 'B';
	cbw.dCBWSignature[3] = 'C';
	cbw.dCBWTag = ++tag;
	cbw.dCBWDataTransferLength = data_length;
	cbw.bmCBWFlags = LIBUSB_ENDPOINT_IN;
	cbw.bCBWLUN = LUN;
	cbw.bCBWCBLength = 10;
	cbw.CBWCB[0] = 0x28; // Read(10)
	cbw.CBWCB[8] = data_length / BLOCK_SIZE;
	libusb_bulk_transfer(handle, ENDPOINT_OUT, (unsigned char*)&cbw, 31, &size, 1000);
}

void get_mass_storage_status(libusb_device_handle *handle) {
	struct command_status_wrapper csw = {};
	int size = 0;
	libusb_bulk_transfer(handle, ENDPOINT_IN, (unsigned char*)&csw, 13, &size, 1000);
	if(size != 13 || csw.dCSWTag != tag || csw.bCSWStatus)
		printf("command failed!\n");
}

int main() {
	libusb_context *ctx = NULL;
	if(libusb_init(&ctx)) return 1;
	libusb_device **list = NULL;
	libusb_device_handle *handle = NULL;
	struct libusb_device_descriptor desc = {};
	ssize_t cnt = libusb_get_device_list(ctx, &list);
	for(ssize_t i = 0; i < cnt; i++) {
		if(libusb_get_device_descriptor(list[i], &desc)) continue;
		if(!(desc.idVendor == VENDOR && desc.idProduct == PRODUCT)) continue;
		if(libusb_open(list[i], &handle)) continue;
		libusb_set_auto_detach_kernel_driver(handle, 1);
		if(libusb_claim_interface(handle, INTERFACE)) { libusb_close(handle); continue; }
#define BOMS_GET_MAX_LUN 0xFE
		if(libusb_control_transfer(handle, LIBUSB_ENDPOINT_IN|LIBUSB_REQUEST_TYPE_CLASS|LIBUSB_RECIPIENT_INTERFACE,
				BOMS_GET_MAX_LUN, 0, 0, &LUN, sizeof(LUN), 1000) < 0) {
			printf("getting LUN failed!");
			libusb_close(handle);
			continue;
		}
		printf("LUN = %d\n", LUN);
		unsigned char* buf_malloc = malloc(BUF_SIZE);
		unsigned char* buf_libusb = libusb_dev_mem_alloc(handle, BUF_SIZE);
		memset(buf_malloc, 0, BUF_SIZE);
		memset(buf_libusb, 0, BUF_SIZE);
		int size = 0;
		send_mass_storage_command(handle, BUF_SIZE);
		libusb_bulk_transfer(handle, ENDPOINT_IN, buf_malloc, BUF_SIZE, &size, 5000);
		get_mass_storage_status(handle);
		send_mass_storage_command(handle, BUF_SIZE);
		libusb_bulk_transfer(handle, ENDPOINT_IN, buf_libusb, BUF_SIZE, &size, 5000);
		get_mass_storage_status(handle);
		for(int j = 0; j < BUF_SIZE; j++) {
			if(buf_malloc[j]) {
				printf("index of first non-null byte in buf_malloc: 0x%04x\n", j);
				break;
			}
		}
		for(int j = 0; j < BUF_SIZE; j++) {
			if(buf_libusb[j]) {
				printf("index of first non-null byte in buf_libusb: 0x%04x\n", j);
				break;
			}
		}
		libusb_dev_mem_free(handle, buf_libusb, BUF_SIZE);
		free(buf_malloc);
		libusb_release_interface(handle, INTERFACE);
		libusb_close(handle);
		break;
	}
	if(cnt >= 0)
		libusb_free_device_list(list, 1);
	libusb_exit(ctx);
	return 0;
}

Hi parafin,
I get below output:

nvidia@jetson-0422418042113:~$ sudo ./test
LUN = 0
index of first non-null byte in buf_malloc: 0x0000

This is a new feature. We will evaluate and update.

Hi parafin,
Please share this patch for our reference.

Hi DaneLLL,

here it is for kernel 4.2: https://github.com/basler/linux-usb-zerocopy/commit/4106b5b45d075d2d2d06c7ba6fa59e7999fdfd5d.patch

Hi,
Please try it on r32.1.

Hi,
I’ve tried my test-case on TX2 with L4T R32.1 - it passes. That’s great news! Now I have to figure out how to enable this feature safely in my code, since it didn’t work in R31.1. Probably I can check kernel version… Can you point me to a commit in kernel sources fixing this issue? Maybe it will help me to come up with something better.

Nevermind, I think I found it: https://nv-tegra.nvidia.com/gitweb/?p=linux-4.9.git;a=commitdiff;h=9a17f9507d42244fa0531209b3289f8022f3658b

Does this zerocopy function was fixed/enabled in the current L4T 4.9(.140) kernel version? Thanks.

Hi,

Yes. R32.1 is K4.9