cudamemcpy with offset

The following code returns success when offset=0 and “InvalidValue” when offset > 0.

cudaError_t result =

                cudaMemcpy(m_search_d+offset,point,ptMem,cudaMemcpyHostToDevice);

I’ve tried a test case where point, ptMem and m_search_d are the same for both calls, and only offset is changed.

I couldn’t find anything in the manual about writing at an offset. Is this something that must be done with a cuda array (can I not use linear memory)? Is there no way to copy memory from the host to the device at an offset of the pointer returned from cudaMalloc?

Thanks for any help.

Have you allocated enough memory on the GPU? Paste the line where you do your cudaMalloc()

unsigned int searchMem = m_dim*numSearch*sizeof(TNum);

...

cudaMalloc( (void**)&m_search_d, searchMem);

...

unsigned int ptMem = m_dim*sizeof(TNum);

As a work around I’ve allocated a large block of host memory, filled it with all the data, and sent it to the GPU all at once. This works as expected, and when I draw the data back out from the GPU and put it in another block of host memory, it matches the original.

Here’s the full source for a very basic minimal example. I think it’s clear this is intended behavior and I must have just missed it in the manual. Any suggestions would be appreciated though (especially if it doesn’t require me to learn how to do texture fetches).

#include <cuda.h>

#include <cuda_runtime.h>

#include <iostream>

#include <map>

int main(int argc, char* argv[])

{

    using std::cout;

    using std::endl;

    using std::map;

map<cudaError_t,const char*> errorStr;

    errorStr[cudaSuccess]                       = "success";

    errorStr[cudaErrorMemoryAllocation]         = "failed to allocate";

    errorStr[cudaErrorInvalidValue]             = "invalid value";

    errorStr[cudaErrorInvalidDevicePointer]     = "invalid device pointer";

    errorStr[cudaErrorInvalidMemcpyDirection]   = "invalid memcpy direction";

int* inData1_h = new int[10];

    int* inData2_h = new int[10];

    int* outData1_h = new int[10];

    int* outData2_h = new int[10];

    int* data_d = 0;

    cudaError_t result;

for(int i=0; i < 10; i++)

    {

        inData1_h[i] = i;

        inData2_h[i] = 10*i;

        outData1_h[i] = 0;

        outData2_h[i] = 0;

    }

cudaSetDevice(0);

result = cudaMalloc( (void**)&data_d, 10*sizeof(int) );

    cout << "Malloc: " << errorStr[result] << endl;

cout << "Copy 1\n";

    result = cudaMemcpy(data_d,inData1_h,10*sizeof(int),cudaMemcpyHostToDevice);

    cout << "   in:     " << errorStr[result] << endl;

    result = cudaMemcpy(outData1_h,data_d,10*sizeof(int),cudaMemcpyDeviceToHost);

    cout << "   out:    " << errorStr[result] << endl;

cout << "   result: ";

    for(int i=0; i < 10; i++)

        cout << outData1_h[i] << " ";

    cout << endl;

cout << "Copy 2\n";

    result = cudaMemcpy(data_d,inData2_h,5*sizeof(int),cudaMemcpyHostToDevice);

    cout << "   in 1:   " << errorStr[result] << endl;

    result = cudaMemcpy(data_d+5*sizeof(int),inData2_h+5*sizeof(int),5*sizeof(int),cudaMemcpyHostToDevice);

    cout << "   in 2:   " << errorStr[result] << endl;

    result = cudaMemcpy(outData2_h,data_d,10*sizeof(int),cudaMemcpyDeviceToHost);

    cout << "   out:    " << errorStr[result] << endl;

cout << "   result: ";

    for(int i=0; i < 10; i++)

        cout << outData2_h[i] << " ";

    cout << endl;

return 1;

}

output:

Malloc: success

Copy 1

   in:     success

   out:    success

   result: 0 1 2 3 4 5 6 7 8 9 

Copy 2

   in 1:   success

   in 2:   invalid value

   out:    success

   result: 0 10 20 30 40 5 6 7 8 9

Change:

result = cudaMemcpy(data_d+5sizeof(int),inData2_h+5sizeof(int),5*sizeof(int), cudaMemcpyHostToDevice);

to:

result = cudaMemcpy(data_d+5,inData2_h+5,5*sizeof(int),cudaMemcpyHostToDevice);

$ ./a.out
Malloc: success
Copy 1
in: success
out: success
result: 0 1 2 3 4 5 6 7 8 9
Copy 2
in 1: success
in 2: success
out: success
result: 0 10 20 30 40 50 60 70 80 90

Your offsets to the pointers in the memcpy call are wrong. Pointer offsets are always word sized offsets, not byte sized.

Well… that was a dumb mistake. It seems that is the cause of the problem after all. Thanks!