**Edited on 2017.9.22(Fri) 11:30(JST)**

Currently, I’m using CUDA and OpenGL to simulating ocean.

I found that when the number of vertices is around **6,000** or **25,000**, the program works correctly. But if the number of vertices is around **100,000** or **400,000**, I get **unspecified launch failure** error.

This is the code I use to update vertices positions and normals with CUDA in a frame:

```
while (!glfwWindowShouldClose(window))
{
...
vec3 *d_vertices = NULL, *d_normals = NULL;
cudaGraphicsMapResources(1, &cudaVboResVertices, 0);
cudaGraphicsMapResources(1, &cudaVboResNormals, 0);
cudaGraphicsResourceGetMappedPointer(
(void**)&d_vertices, NULL, cudaVboResVertices
);
cudaGraphicsResourceGetMappedPointer(
(void**)&d_normals, NULL, cudaVboResNormals
);
//update vertices positions and normals
//faceNumber*3 is the number of vertices
launchGPUKernel(faceNumber*3, d_vertices, d_normals);
t += dt;
//the error first occurs at this line at the first iteration
cudaGraphicsUnmapResources(1, &cudaVboResNormals, 0);
cudaGraphicsUnmapResources(1, &cudaVboResVertices, 0);
...
}
```

The launchGPUKernel is like this:

```
void launchGPUKernel(int num_points, vec3 *d_vtxs, vec3 *d_nmls){
dim3 grid(num_points/512 + 1, 1);
dim3 block(16, 64, 1);
d_update<<<grid, block>>>(num_points, WAVE_NUM, d_vtxs, d_nmls, d_wave_paras, t);
}
```

The d_update:

```
__global__ void d_update(
int num_points, int wave_num,
vec3 *d_vtxs, vec3 *d_nmls, float *d_wv_prs,
float d_time
){
long block_number = blockIdx.x + blockIdx.y*gridDim.x;
block_number *= (blockDim.x*blockDim.y);//the number of threads before current block
long idx = threadIdx.x + threadIdx.y*blockDim.x;
idx += block_number;
float x, z, height;
x = d_vtxs[idx].x;
z = d_vtxs[idx].z;
height = 0;
for (size_t j = 0; j < wave_num; j++) {
float a, b, theta, omega, phi;
a = d_wv_prs[j*5+0];
b = d_wv_prs[j*5+1];
theta = d_wv_prs[j*5+2];
omega = d_wv_prs[j*5+3];
phi = d_wv_prs[j*5+4];
float temp = (cos(theta)*x + sin(theta)*z)*omega + d_time*phi;
height += a*cos(temp) + b*sin(temp);
}
d_vtxs[idx].y = height;
float Hx, Hz;
Hx = 0;
Hz = 0;
for (size_t j = 0; j < wave_num; j++) {
float a, b, theta, omega, phi;
a = d_wv_prs[j*5+0];
b = d_wv_prs[j*5+1];
theta = d_wv_prs[j*5+2];
omega = d_wv_prs[j*5+3];
phi = d_wv_prs[j*5+4];
float temp = (cos(theta)*x + sin(theta)*z)*omega + d_time*phi;
Hx += -sin(temp)*omega*cos(theta)*a + cos(temp)*omega*cos(theta)*b;
Hz += -sin(temp)*omega*sin(theta)*a + cos(temp)*omega*sin(theta)*b;
}
vec3 v3_temp = normalize( vec3(-Hx, 1, -Hz) );
d_nmls[idx].x = v3_temp.x;
d_nmls[idx].y = v3_temp.y;
d_nmls[idx].z = v3_temp.z;
}
```

**Environment**

OS X EI Captitan 10.11.6

CUDA 8.0

OpenGL 4.0

Sorry for the lack of information before.

I don’t know what is wrong.

Need your help, guys :)

Thank you very much!