Hi, I’m building a standard deviation program step by step and I’m having trouble getting a particular step to work.
The code compiles and runs, I know that the data is being read correctly but the calculation never seems to take place.
[codebox]///////////////////////////////////////////////////////////////////////////
// datapointdiff.cu
// Programmed by Tom Wilkinson
// Finds the difference between data points
// and the mean then squares them
//////////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cutil.h>
#define BLOCK_SIZE 16
// Kernel
global void
meandiffsquare(float* data, float* results, float* average, float* answer, int num)
{
int i= blockDim.x*blockIdx.x + threadIdx.x;
// calculate the difference from the mean and then square it
if(i<num)
{
answer[i] = (data[i] - average[0]);
results[i] = answer[i] * answer[i];
}
}
// Main
int main (int argc, char** argv)
{
int numElements, i;
float* mean;
float* RR;
float* RRmds;
float* ansmtrx;
float* d_ansmtrx;
float* d_RR;
float* d_RRmds;
mean = (float*)malloc(sizeof(float));
cutGetCmdLineArgumenti( argc, (const char**) argv, "n", &numElements);
mean[0] = 5.13f;
printf("%d %f\n\n\n", numElements, mean[0]);
// allocate host memory
unsigned int memsize = sizeof(float) * numElements;
ansmtrx = (float*)malloc(memsize);
RR = (float*)malloc(memsize);
RRmds = (float*)malloc(memsize);
//obtain data from file
FILE *file;
if ( (file = fopen( "RR.dat", "r" )) == NULL ) printf("Can't open file!\n");
for (i=0;i<numElements;i++)
{
fscanf( file, "%f", &RR[i] );
}
fclose(file);
// create empty results matrix
for (i = 0; i < numElements; i++)
{
ansmtrx[i] = 0;
}
for (i = 0; i < numElements; i++)
{
RRmds[i] = 0;
}
// allocate device memory
cudaMalloc((void **)&d_ansmtrx, numElements*sizeof(ansmtrx[0]));
cudaMalloc((void **)&d_RR, numElements*sizeof(RR[0]));
cudaMalloc((void **)&d_RRmds, numElements*sizeof(RRmds[0]));
// copy data to device
cudaMemcpy(d_ansmtrx, ansmtrx, numElements * sizeof(ansmtrx[0]), cudaMemcpyHostToDevice);
cudaMemcpy(d_RR, RR, numElements * sizeof(RR[0]), cudaMemcpyHostToDevice);
cudaMemcpy(d_RRmds, RRmds, numElements * sizeof(RRmds[0]), cudaMemcpyHostToDevice);
// set kernel configuration
int N=numElements;
// set kernel config
dim3 threads( BLOCK_SIZE);
dim3 grid( N/ threads.x);
//run kernel
meandiffsquare<<< grid , threads >>>(d_RR, d_RRmds, mean, d_ansmtrx, numElements);
cudaThreadSynchronize();
// copy data back to host
cudaMemcpy(RRmds, d_RRmds, numElements * sizeof(RRmds[0]), cudaMemcpyDeviceToHost);
//print results
for (i=0; i<numElements; i++)
{
printf("%f ", RRmds[i]);
}
printf("\n");
//print results to file
FILE *file2;
if ( (file2 = fopen( "RRmds.dat", "w" )) == NULL ) printf("Can't open file!\n");
for (i=0; i<numElements; i++)
{
fprintf(file2, "%f ", RRmds[i]);
}
fclose(file2);
return EXIT_SUCCESS;
}[/codebox]
Any help is much appreciated, thanks in advance.
Tom W