Hi,
My original topic was linked to the title but I have managed to solve one half of my problem and rather than creating a new topic I have edited this post accordingly.
I am still having some trouble with cudaMemcpy. I have set up my code so that I have wrappers for cuda functions and error checks in my “crowdSim.cu” file:
[codebox]//crowdSim.cu
#include “crowdSim.cuh”
#include “crowd_kernels.h”
void checkCUDAError(const char *msg)
{
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) );
exit(EXIT_FAILURE);
}
}
void initDevice(void)
{
cudaSetDevice( cutGetMaxGflopsDeviceId() );
checkCUDAError("initDevice");
}
void allocDeviceMem(void **mem, int size)
{
cudaMalloc(mem, size);
checkCUDAError("malloc");
}
void memCpy(void *host, void *device, int size)
{
cudaMemcpy(device, host, size, cudaMemcpyHostToDevice);
checkCUDAError("memcpy");
}
void freeDevice(void *mem)
{
cudaFree(mem);
checkCUDAError("free");
}
void regBuffer(uint vbo)
{
cudaGLRegisterBufferObject(vbo);
checkCUDAError("regVBO");
}
void unRegBuffer(uint vbo)
{
cudaGLUnregisterBufferObject(vbo);
checkCUDAError("unregVBO");
}
void *mapVBO(void *ptr, uint vbo)
{
cudaGLMapBufferObject(&ptr, vbo);
checkCUDAError("mapVBO");
return ptr;
}
void unMapVBO(uint vbo)
{
cudaGLUnmapBufferObject(vbo);
checkCUDAError("unmapVBO");
}[/codebox]
Here is the code from “crowdSim.cpp” that I use to allocate memory on the device and copy from host to device:
[codebox]void allocMem(void)
{
allocDeviceMem((void**)&dVel, sizeof(float2) * numChars);
memCpy(dVel, hVel, sizeof(float2) * numChars);
//cudaMemcpy(dVel, hVel, sizeof(float2) * numChars, cudaMemcpyHostToDevice);
//checkCUDAError("memcpy");
}[/codebox]
Currently I am able to allocate memory on the device without trouble. The problem occurs when I wish to use my “memCpy” method. If I do this the “checkCUDAError” call prints an error string saying : “Cuda Error: memcpy : invalid argument”. If I comment it out and just call “cudaMemcpy” directly from “crowdSim.cpp” it passes the “checkCUDAError” test i.e. it returns success.
I am probably missing something very obvious here but I have been round and round trying to figure out why it might say this. any help would be greatly appreciated.
I have included a full copy (minus unnecessary bits for which I have comments) of my “crowdSim.cpp” file in case that helps:
[codebox]#include <stdlib.h>
#include <stdio.h>
#include <GL/glew.h>
#include <GL/glut.h>
#include “crowd.h”
#include “crowdSim.cuh”
#define TIMESTEP 0.09f
#define WINDIMX 640
#define WINDIMY 480
#define FIGHTRAD 1.0f/64.0f
#define CELLSIZE FIGHTRAD * 2.0f
#define GRIDDIMX WINDIMX/CELLSIZE
#define GRIDDIMY WINDIMY/CELLSIZE
#define NUMCELLS GRIDDIMX*GRIDDIMY
#define NUMCOLOURS 8
using namespace std;
vector <Crowd *> crowds;
int numCrowds;
float3* crowdColours;
float3 colours[NUMCOLOURS];
float4* hPos;
float4* dPos;
float2* hVel;
float2* dVel;
uint posVBO;
uint colourVBO;
uint2* dHash[2];
int prevChars, numChars;
void setColours(float4 *pos)
{
//colours for colourVBO
}
void cleanup()
{
for(unsigned int i = 0; i < crowds.size(); i++){
crowds[i]->~Crowd();
}
unRegBuffer(posVBO);
unRegBuffer(colourVBO);
free(hPos); free(hVel);
freeDevice(dHash[0]); freeDevice(dHash[1]); freeDevice(dVel);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glDeleteBuffers(1, &posVBO);
glDeleteBuffers(1, &colourVBO);
}
void allocMem(void)
{
allocDeviceMem((void**)&dVel, sizeof(float2) * numChars);
memCpy(dVel, hVel, sizeof(float2) * numChars);
//cudaMemcpy(dVel, hVel, sizeof(float2) * numChars, cudaMemcpyHostToDevice);
//checkCUDAError("memcpy");
printf("Memory allocated\n");
}
uint setBuffer(uint vbo, uint size, void *data)
{
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, size, data, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
regBuffer(vbo);
printf("Buffer set\n");
return vbo;
}
void addCrowd(int size, int currSize, int dim, float density, float2 startPos, float4 *pos, float2 *vel, int tag)
{
//sets crowd position in hPos and velocities in hVel
}
void initCrowds()
{
//pos, size and density arrays are defined here
for (int i = 0; i < numCrowds; i++){
//printf("size of crowd 1?"); Add user input method here
numChars += size[i];
hPos = (float4*)realloc(hPos, sizeof(float4) * numChars);
hVel = (float2*)realloc(hVel, sizeof(float2) * numChars);
Crowd *temp = new Crowd(size[i], i+1, 0.5f, density[i], 0.5f);
crowds.push_back(temp);
addCrowd(size[i], prevChars, crowds[i]->getDim(), density[i], pos[i], hPos, hVel, i+1);
prevChars += size[i];
}
crowdColours = (float3*)malloc(sizeof(float3) * numChars);
printf("crowds created\n");
}
void updateSim()
{
dPos = (float4 *) mapVBO(dPos, posVBO);
hashChars(dHash[0], dPos, numChars);
unMapVBO(posVBO);
}
void display(void)
{
updateSim();
glMatrixMode (GL_PROJECTION);
glLoadIdentity ();
glOrtho (0, WINDIMX, WINDIMY, 0, 0, 1);
glMatrixMode (GL_MODELVIEW);
glDisable(GL_DEPTH_TEST);
glClearColor(0, 0.5, 0, 0);
glClear(GL_COLOR_BUFFER_BIT);
glPointSize(1);
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glEnableClientState(GL_VERTEX_ARRAY);
glDisable(GL_CULL_FACE);
glColor3f(colours[0].x, colours[0].y, colours[0].z);
glBindBuffer(GL_ARRAY_BUFFER, posVBO);
glVertexPointer(2, GL_FLOAT, sizeof(float4), NULL);
glBindBuffer(GL_ARRAY_BUFFER, colourVBO);
glColorPointer(3, GL_FLOAT, 0, 0);
glEnableClientState(GL_COLOR_ARRAY);
glDrawArrays(GL_POINTS, 0, numChars);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
glDisable(GL_TEXTURE_2D);
glutSwapBuffers();
glutPostRedisplay();
}
void keyboard( unsigned char key, int x, int y)
{
switch( key) {
case 27:
exit(0);
default: break;
}
}
int main(int argc, char** argv) {
initDevice();
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
glutInitWindowSize(WINDIMX,WINDIMY);
glutCreateWindow("Crowd Sim");
glewInit();
initCrowds();
setColours(hPos);
allocMem();
posVBO = setBuffer(posVBO, sizeof(float4) * numChars, hPos);
colourVBO = setBuffer(colourVBO, sizeof(float3) * numChars, crowdColours);
printf("*Displaying....*\n");
glutDisplayFunc(display);
glutKeyboardFunc(keyboard);
atexit(cleanup);
glutMainLoop();
cudaThreadExit();
return 0;
}[/codebox]