Hello,
I have found no mention of possibility or impossibility of using standard memcpy() on device code.
So, when I try to use it such way:
[codebox]#include <stdio.h>
#include <cuda.h>
global void decodeOnDevice(char *a) {
char b[MAX_SIZE];
int i;
memcpy (b, a, 16);
for (i = 0; i < 16; i++)
b[i] = b[i] ^ 0xFF;
memcpy (a,b, 16);
}
int main(void) {
char *temp_host; // pointers to host memory
char *temp_device; // pointers to device memory
char commands[2][MAX_SIZE];
int i;
// allocate arrays on host
temp_host = (char *)malloc(MAX_SIZE);
// allocate arrays on device
cudaMalloc((void **) &temp_device, MAX_SIZE);
// initialize host data
memset(commands[0], 0, MAX_SIZE);
// these are the encoded commands
memcpy(commands[0], “\xB7\x9A\x93\x93\x90\xDF\xBC\xAA\xBB\xBE\xDF\xA8\x90\x8D\x93\x9B”, strlen(“Hello CUDA World!”));
i = 0;
memset(temp_host, 0, MAX_SIZE);
memcpy(temp_host, commands[i], strlen(commands[i]));
// send data from host to device
cudaMemcpy(temp_device, temp_host, MAX_SIZE, cudaMemcpyHostToDevice);
// data copied on device, invoking kernel
decodeOnDevice <<< 1, 1 >>> (temp_device);
// retrieve data from device
cudaMemcpy(temp_host, temp_device, MAX_SIZE, cudaMemcpyDeviceToHost);
// execute the decoded command
printf (temp_host);
}[/codebox]
I’ve got the error:
tmpxft_00000c48_00000000-8_mal.cudafe2.gpu
Signal: caught in PU_adjust_addr_flags phase.
(0): Error: Signal caught in phase PU_adjust_addr_flags – processing aborted
nvopencc ERROR: D:\Visual\CUDA\bin/…/open64/lib//be.exe returned non-zero status 3
If I try to write _memcpy emulate function, it’s ok:
[codebox]device void * _memcpy (char *to, char *from, size_t len);
global void decodeOnDevice(char *a) {
char b[MAX_SIZE];
int i;
_memcpy (b, a, 16);
for (i = 0; i < 16; i++)
b[i] = b[i] ^ 0xFF;
_memcpy (a,b, 16);
}
device void * _memcpy (char *to, char *from, size_t len)
{
size_t i;
for (i=0; i< len; i++) to[i] = from[i];
}
[/codebox]
It’s a compiler bug? I can’t use memcpy() on device, can I?
CUDA 2.1, WinXP 32 bit, SDK 2.1