Problem with memcpy and vbo registering

Hi,

My original topic was linked to the title but I have managed to solve one half of my problem and rather than creating a new topic I have edited this post accordingly.

I am still having some trouble with cudaMemcpy. I have set up my code so that I have wrappers for cuda functions and error checks in my “crowdSim.cu” file:

[codebox]//crowdSim.cu

#include “crowdSim.cuh”

#include “crowd_kernels.h”

void checkCUDAError(const char *msg)

{

cudaError_t err = cudaGetLastError();

if( cudaSuccess != err) 

{

    fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) );

    exit(EXIT_FAILURE);

}                         

}

void initDevice(void)

{

cudaSetDevice( cutGetMaxGflopsDeviceId() );

checkCUDAError("initDevice");

}

void allocDeviceMem(void **mem, int size)

{

cudaMalloc(mem, size);

checkCUDAError("malloc");

}

void memCpy(void *host, void *device, int size)

{

cudaMemcpy(device, host, size, cudaMemcpyHostToDevice);

checkCUDAError("memcpy");

}

void freeDevice(void *mem)

{

cudaFree(mem); 

checkCUDAError("free");

}

void regBuffer(uint vbo)

{

cudaGLRegisterBufferObject(vbo);

checkCUDAError("regVBO");

}

void unRegBuffer(uint vbo)

{

cudaGLUnregisterBufferObject(vbo);

checkCUDAError("unregVBO");

}

void *mapVBO(void *ptr, uint vbo)

{

cudaGLMapBufferObject(&ptr, vbo);

checkCUDAError("mapVBO");

return ptr;

}

void unMapVBO(uint vbo)

{

cudaGLUnmapBufferObject(vbo);

checkCUDAError("unmapVBO");

}[/codebox]

Here is the code from “crowdSim.cpp” that I use to allocate memory on the device and copy from host to device:

[codebox]void allocMem(void)

{

allocDeviceMem((void**)&dVel, sizeof(float2) * numChars);



memCpy(dVel, hVel, sizeof(float2) * numChars);

//cudaMemcpy(dVel, hVel, sizeof(float2) * numChars, cudaMemcpyHostToDevice);

//checkCUDAError("memcpy");

}[/codebox]

Currently I am able to allocate memory on the device without trouble. The problem occurs when I wish to use my “memCpy” method. If I do this the “checkCUDAError” call prints an error string saying : “Cuda Error: memcpy : invalid argument”. If I comment it out and just call “cudaMemcpy” directly from “crowdSim.cpp” it passes the “checkCUDAError” test i.e. it returns success.

I am probably missing something very obvious here but I have been round and round trying to figure out why it might say this. any help would be greatly appreciated.

I have included a full copy (minus unnecessary bits for which I have comments) of my “crowdSim.cpp” file in case that helps:

[codebox]#include <stdlib.h>

#include <stdio.h>

#include <GL/glew.h>

#include <GL/glut.h>

#include

#include

#include “crowd.h”

#include “crowdSim.cuh”

#define TIMESTEP 0.09f

#define WINDIMX 640

#define WINDIMY 480

#define FIGHTRAD 1.0f/64.0f

#define CELLSIZE FIGHTRAD * 2.0f

#define GRIDDIMX WINDIMX/CELLSIZE

#define GRIDDIMY WINDIMY/CELLSIZE

#define NUMCELLS GRIDDIMX*GRIDDIMY

#define NUMCOLOURS 8

using namespace std;

vector <Crowd *> crowds;

int numCrowds;

float3* crowdColours;

float3 colours[NUMCOLOURS];

float4* hPos;

float4* dPos;

float2* hVel;

float2* dVel;

uint posVBO;

uint colourVBO;

uint2* dHash[2];

int prevChars, numChars;

void setColours(float4 *pos)

{

//colours for colourVBO

}

void cleanup()

{

for(unsigned int i = 0; i < crowds.size(); i++){

	crowds[i]->~Crowd();

}

unRegBuffer(posVBO);

unRegBuffer(colourVBO);

free(hPos); free(hVel);

freeDevice(dHash[0]); freeDevice(dHash[1]); freeDevice(dVel); 

glBindBuffer(GL_ARRAY_BUFFER, 0);

glDeleteBuffers(1, &posVBO);

glDeleteBuffers(1, &colourVBO);

}

void allocMem(void)

{

allocDeviceMem((void**)&dVel, sizeof(float2) * numChars);



memCpy(dVel, hVel, sizeof(float2) * numChars);

//cudaMemcpy(dVel, hVel, sizeof(float2) * numChars, cudaMemcpyHostToDevice);

//checkCUDAError("memcpy");

printf("Memory allocated\n");

}

uint setBuffer(uint vbo, uint size, void *data)

{

glGenBuffers(1, &vbo);

glBindBuffer(GL_ARRAY_BUFFER, vbo);

glBufferData(GL_ARRAY_BUFFER, size, data, GL_DYNAMIC_DRAW);

glBindBuffer(GL_ARRAY_BUFFER, 0);

regBuffer(vbo);

printf("Buffer set\n");

return vbo;

}

void addCrowd(int size, int currSize, int dim, float density, float2 startPos, float4 *pos, float2 *vel, int tag)

{

//sets crowd position in hPos and velocities in hVel

}

void initCrowds()

{

//pos, size and density arrays are defined here

for (int i = 0; i < numCrowds; i++){

	

	//printf("size of crowd 1?");  Add user input method here

	numChars += size[i];

	hPos = (float4*)realloc(hPos, sizeof(float4) * numChars);

	hVel = (float2*)realloc(hVel, sizeof(float2) * numChars);

	Crowd *temp = new Crowd(size[i], i+1, 0.5f, density[i], 0.5f);

	crowds.push_back(temp);

	addCrowd(size[i], prevChars, crowds[i]->getDim(), density[i], pos[i], hPos, hVel, i+1);

	prevChars += size[i];

}

crowdColours = (float3*)malloc(sizeof(float3) * numChars);

printf("crowds created\n");

}

void updateSim()

{

dPos = (float4 *) mapVBO(dPos, posVBO);

hashChars(dHash[0], dPos, numChars);

unMapVBO(posVBO);

}

void display(void)

{

updateSim();

glMatrixMode (GL_PROJECTION);

glLoadIdentity ();

glOrtho (0, WINDIMX, WINDIMY, 0, 0, 1);

glMatrixMode (GL_MODELVIEW);

glDisable(GL_DEPTH_TEST);

glClearColor(0, 0.5, 0, 0);

glClear(GL_COLOR_BUFFER_BIT);

glPointSize(1);

glEnable(GL_BLEND);

glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);

glEnableClientState(GL_VERTEX_ARRAY);

glDisable(GL_CULL_FACE);

glColor3f(colours[0].x, colours[0].y, colours[0].z);

glBindBuffer(GL_ARRAY_BUFFER, posVBO);

glVertexPointer(2, GL_FLOAT, sizeof(float4), NULL);

glBindBuffer(GL_ARRAY_BUFFER, colourVBO);

glColorPointer(3, GL_FLOAT, 0, 0);

glEnableClientState(GL_COLOR_ARRAY);

glDrawArrays(GL_POINTS, 0, numChars);

glBindBuffer(GL_ARRAY_BUFFER, 0);

glDisableClientState(GL_VERTEX_ARRAY);

glDisableClientState(GL_COLOR_ARRAY);

glDisableClientState(GL_TEXTURE_COORD_ARRAY);

glDisable(GL_TEXTURE_2D);

glutSwapBuffers();

glutPostRedisplay();

}

void keyboard( unsigned char key, int x, int y)

{

switch( key) {

	case 27:

		exit(0);

	default: break;

}

}

int main(int argc, char** argv) {

initDevice();

glutInit(&argc, argv);

glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);

glutInitWindowSize(WINDIMX,WINDIMY);

glutCreateWindow("Crowd Sim");

glewInit();

initCrowds();

setColours(hPos);

allocMem();

posVBO = setBuffer(posVBO, sizeof(float4) * numChars, hPos);

colourVBO = setBuffer(colourVBO, sizeof(float3) * numChars, crowdColours);

printf("*Displaying....*\n");

glutDisplayFunc(display);

glutKeyboardFunc(keyboard);

atexit(cleanup);

glutMainLoop();

cudaThreadExit();

return 0;

}[/codebox]

I am not sure, but I think the problem is “setBuffer”. Try it out:

void setBuffer(GLuint* vbo, unsigned int size, void* data)

{

	// create buffer object

	glGenBuffers(1, vbo);

	glutReportErrors();

	glBindBuffer(GL_ARRAY_BUFFER, *vbo);

	glutReportErrors();

	// initialize buffer object

	glBufferData(GL_ARRAY_BUFFER, size, data,GL_DYNAMIC_DRAW);

	glutReportErrors();

	glBindBuffer(GL_ARRAY_BUFFER, 0);

	glutReportErrors();

	regBuffer(&vbo);

}

And for mapVBO:

void mapVBO(void *ptr, uint vbo)

{

		cudaGLMapBufferObject((void**)&ptr, vbo)

		checkCUDAError("mapVBO");

}

[quote name=‘Deus’ post=‘1096826’ date=‘Jul 31 2010, 04:42 PM’]

I am not sure, but I think the problem is “setBuffer”. Try it out:

[codebox]void *mapVBO(void *ptr, uint vbo)

{

cudaGLMapBufferObject(&ptr, vbo);

checkCUDAError("mapVBO");

return ptr;

}[/codebox]

so that it returned a pointer in my update function:

[codebox]void updateSim()

{

dPos = (float4 *)mapVBO(dPos, posVBO);

hashChars(dHash[0], dPos, numChars);

unMapVBO(posVBO);

}[/codebox]

I have subsequently edited my post as my problem now is with cudaMemcpy.