No idea why this isn't working I'm trying to run a simple CUDA program I wrote. It's not

I wrote this simple CUDA code with the intent of increasing N to a substantial number (as well as making it run through the calculations many more times) and comparing its speed to another non-CUDA version.

However, when I try running it after it’s compiled, my two output lines show up as the initial read in values from the text file, without any changes from the GPU calculations.

I would appreciate some help in finding what’s going wrong. Also, I know there’s probably some better ways to do some of the stuff (specifically the calculations to find the accelerations). If you can suggest something, that would be appreciated since I’m still a bit

Thanks in advance.

The code:

#include <iostream>

#include <ctime>

#include <stdlib.h>

#include <time.h>

#include <math.h>

#include <fstream>

#define N 2

#define blocksize 1

using namespace std;

class GObj

{	

	public:

	float x,y,vx,vy,ax,ay,m;

	GObj();

	GObj(float,float);

	GObj(float,float,float);

	__device__ void GCalc(GObj);

	__device__ void moveCalc();

	void setV(float,float);

};

__global__ void grav (GObj * a, int w)//calculate forces of gravity between each object and all higher slot # objects 

{

	int i = blockIdx.x * blockDim.x + threadIdx.x; 

	for(int j = (i + 1); j < w; j++)

		a[i].GCalc(*(a+j));

}

__global__ void gmov(GObj * a, int w)//move each GObj

{

	int i = blockIdx.x * blockDim.x + threadIdx.x; 

	if (i < w)

	{

		a[i].moveCalc();

	}

}

GObj::GObj()//constructors setting location and mass.

{

	x = 0;	y = 0;	vx = 0; vy = 0; ax = 0; ay = 0; m = 5.97e24;

}

GObj::GObj(float a, float b)

{

	x = a;

	y = b;

	vx = 0; vy = 0; ax = 0; ay = 0; m = 5.97e24;

}

GObj::GObj(float a, float b, float c)

{

	x = a;

	y = b;

	m = c;

	vx = 0; vy = 0; ax = 0; ay = 0;

}

__device__ void GObj::GCalc(GObj g)//calculate forces of gravity between 2 objects.

{

	float G = 6.674e-11;

	float r = sqrt(1.0 * (pow(g.x - x,2) + pow(g.y - y,2)));

	float theta = atan2((g.y-y),(g.x-x));

	float a = (G * g.m) / (r*r);

	float ga = (G * m) / (r*r);

	ax += a * cos(theta);

	ay += a * sin(theta);

	g.ax += ga * cos(theta + 3.14159);

	g.ay += ga * sin(theta + 3.14159);

}

__device__ void GObj::moveCalc()//movement in 1s, assumes no changes in forces.

{

	x += vx + (ax / 2);

	y += vy + (ay / 2);

	vx += ax;

	vy += ay;

}

void GObj::setV(float a, float b)//set velocity of the objects.

{

	vx = a; vy = b;

}

int main()

{

	GObj *a;

	GObj *b;

	string line;

	ifstream infile ("grav.dat");//inport co-ordinates from grav.dat

	size_t size = N * sizeof(GObj);//set size to allocate for the objects.

	a = (GObj*)malloc(size);//allocate memory on CPU for a.

	dim3 dimBlock(blocksize);//make block dimensions.

	dim3 dimGrid(ceil(N/(float)blocksize));

	if (infile.is_open())

    {

		int tx, ty;

		for (int i = 0;i < N && infile.good();i++)//read in coordinates until the file is done or all N slots of the array are full

		{

		  infile >> tx;

		  infile >> ty;

		  (*(a+i)) = GObj(tx,ty);

		}

		infile.close();

    }

    else 

		return 1;//end the program

	cudaMalloc((void**)&b, size);//allocate GPU memory for b.

	cudaMemcpy(b, a, size, cudaMemcpyHostToDevice);//copy a to the GPU.

	grav<<<dimBlock,dimGrid>>>(b,N);//run gravity calcs on b.

	gmov<<<dimBlock,dimGrid>>>(b,N);//move b members accordingly.

	cudaMemcpy(a, b, size, cudaMemcpyDeviceToHost);//copy b to the CPU.

	for (int i = 0; i < N; i++)

		cout << "a[" << i << "]: (" << a[i].x << "," << a[i].y << ") vel: (" << a[i].vx << "," <<a[i].vy << ") acc: (" << a[i].ax << ","<< a[i].ay <<") \n" ; //print out new coordinates.

	cout << "Press Enter ";

	cin.ignore( numeric_limits<streamsize>::max(), '\n' );

	return 0;

}

Currently grav.dat only has the following lines:

0 0

100000 0

If you have any questions that you want me to answer, just tell me and I’ll answer them to the best of my ability.

Check return codes from all CUDA function calls, this will probably give a good hint on what’s going wrong.
Just to rule out any installation problems: Do the SDK examples run on your installation?

Regarding possible improvements, the SDK N-body example comes with a whitepaper that explains optimization of this problem in detail.

Did you set the device you are using?

cudaError_t cudaStatus = cudaSetDevice(0);

if (cudaStatus != cudaSuccess) {

	fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");

}

Also, check to see if your kernel launched correctly.

//kernel call

cudaError_t status = cudaGetLastError();

if (cudaStatus != cudaSuccess) {

	fprintf(stderr, "Kernel did not launch");

}