Hello.
I’m just about to make a loop program on CUDA as below.
From several information, my guess at first was that this problem caused by segmentation
But this program don’t seem to have such a bug…
Please point out points including bugs…
Thank you.
/////////////////////////////////////////////////////////////
#include<stdio.h>
#include<cutil.h>
#include<cutil_inline.h>
#define LOOP (20)
#define BLOCKNUM (60)
#define THREAD_PER_BLOCK (256)
#define STRIDESIZE THREAD_PER_BLOCK16BLOCKNUM
#define FILESIZE THREAD_PER_BLOCK16BLOCKNUM*LOOP
global void kernel(
unsigned int *in,
unsigned int *out,
unsigned int loopnum,
unsigned int stride)
{
int tid = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int io[4];
//loop start
int i;
for(i = 0; i < loopnum; i++){
io[0] = in[4*tid + 0];
io[1] = in[4*tid + 1];
io[2] = in[4*tid + 2];
io[3] = in[4*tid + 3];
io[0] += 1;
io[1] += 1;
io[2] += 1;
io[3] += 1;
out[4*tid + 0] = io[0];
out[4*tid + 1] = io[1];
out[4*tid + 2] = io[2];
out[4*tid + 3] = io[3];
in += stride;
out += stride;
}
return;
}
// ****************************
// MAIN
// ****************************w
main(int argc, char **argv){
unsigned int *pt;
unsigned int *ct_buf2;
CUT_DEVICE_INIT(argc, argv);
// malloc
CUDA_SAFE_CALL( cudaMallocHost((void **)&ct_buf2, sizeof(unsigned int)*FILESIZE/4) );
CUDA_SAFE_CALL( cudaMallocHost((void **)&pt, sizeof(unsigned int)*FILESIZE/4) );
unsigned int *d_pt, *d_ct;
CUDA_SAFE_CALL( cudaMalloc( (void **)&d_pt, sizeof(unsigned int)*FILESIZE/4) );
CUDA_SAFE_CALL( cudaMalloc( (void **)&d_ct, sizeof(unsigned int)*FILESIZE/4) );
CUDA_SAFE_CALL( cudaMemcpy( d_pt, pt, sizeof(unsigned char)*FILESIZE/4, cudaMemcpyHostToDevice) );
CUDA_SAFE_CALL( cudaMemset( d_ct, 0, sizeof(unsigned int)*FILESIZE/4) );
dim3 grid(BLOCKNUM, 1, 1), block(THREAD_PER_BLOCK, 1, 1);
kernel<<<grid, block>>>(d_pt, d_ct, LOOP, STRIDESIZE/4);
cudaThreadSynchronize();
CUDA_SAFE_CALL( cudaMemcpy( ct_buf2, d_ct, sizeof(unsigned int)*FILESIZE/4, cudaMemcpyDeviceToHost) );
CUDA_SAFE_CALL( cudaFreeHost(ct_buf2) );
CUDA_SAFE_CALL( cudaFreeHost(pt) );
CUDA_SAFE_CALL( cudaFree( d_pt ) );
CUDA_SAFE_CALL( cudaFree( d_ct ) );
}