Hi Guys,
This is my first post, i´m trying to do my first cuda program but then the problens begins…
I´ve seached in the forum about my problem and i found some things but i dont know how to do…if someone knows what i am doing wrong, and could tell me…
Thats the error:
[codebox]First-chance exception at 0x7c812afb in cppIntegration.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0012fdec…
The thread ‘Win32 Thread’ (0x3cc) has exited with code -1 (0xffffffff).
The program ‘[3112] cppIntegration.exe: Native’ has exited with code -1 (0xffffffff).[/codebox]
And this is the code:
[codebox]
/* Example of integrating CUDA functions into an existing
-
application / framework.
-
Device code.
*/
#ifndef TD_KERNEL_H
#define TD_KERNEL_H
global void montar_carga_global (float** outMatriz, float** inMatriz, int num_col, int tam_out_mat){
const unsigned int i = threadIdx.x;
int pos = (i * num_col);
if(pos < tam_out_mat)
for(int j= 0;j < num_col; j++)
outMatriz[pos+j][0] = inMatriz[i][j];
}
#endif // #ifndef TD_KERNEL_H
[/codebox]
[codebox]
/*
-
Copyright 1993-2009 NVIDIA Corporation. All rights reserved.
-
NVIDIA Corporation and its licensors retain all intellectual property and
-
proprietary rights in and to this software and related documentation and
-
any modifications thereto. Any use, reproduction, disclosure, or distribution
-
of this software and related documentation without an express license
-
agreement from NVIDIA Corporation is strictly prohibited.
*/
/* Example of integrating CUDA functions into an existing
-
application / framework.
-
Host part of the device code.
-
Compiled with Cuda compiler.
*/
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, project
#include <cutil_inline.h>
// includes, kernels
#include <TD_kernel.cu>
//
////////////////////////////////////////////////////////////////////////////////
// declaration, forward
extern “C” void
runTest(const int argc, const char** argv,
float** carga_global,unsigned int nLin_carga_global,unsigned int nCol_carga_global,
float** carga_nodal,unsigned int nLin_carga_nodal,unsigned int nCol_carga_nodal)
{
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") )
cutilDeviceInit(argc, (char**)argv);
else
cudaSetDevice( cutGetMaxGflopsDeviceId() );
const unsigned int num_threads = 16;
const unsigned int mem_size_cargaG = sizeof(float) * nLin_carga_global * nCol_carga_global;
const unsigned int mem_size_cargaN = sizeof(float) * nLin_carga_nodal * nCol_carga_nodal;
// allocate device memory
float** d_cargaN;
cutilSafeCall(cudaMalloc((void**) &d_cargaN, mem_size_cargaN));
cutilSafeCall(cudaMemcpy(d_cargaN, carga_nodal, mem_size_cargaN,
cudaMemcpyHostToDevice) );
float** d_cargaG;
cutilSafeCall(cudaMalloc((void**) &d_cargaG, mem_size_cargaG));
cutilSafeCall(cudaMemcpy(d_cargaG, carga_global, mem_size_cargaG,
cudaMemcpyHostToDevice) );
// setup execution parameters
dim3 grid(1, 1, 1);
dim3 threads(num_threads, 1, 1);
cudaGetLastError();
// execute the kernel
montar_carga_global<<< grid, threads >>>(d_cargaG, d_cargaN, nCol_carga_nodal, (nLin_carga_nodal * nCol_carga_nodal));
cudaGetLastError();
// check if kernel execution generated and error
cutilCheckMsg("Kernel execution failed");
// copy results from device to host
cutilSafeCall(cudaMemcpy(carga_global, d_cargaG, mem_size_cargaG,
cudaMemcpyDeviceToHost));
printf(“Test PASSED”);
cutilSafeCall(cudaFree(d_cargaG));
cutilSafeCall(cudaFree(d_cargaN));
cudaThreadExit();
}
[/codebox]
Thx guys External Image