Hi, I am a beginner in CUDA programming and want to practice it on my notebook.
My video card is GT 420M, with i5 CPU.
I installed Nsight successfully into VS2010 and followed the instruction below to create a test project.
My test code is shown as below
#include <iostream>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
using namespace std;
__global__ void VectorAddKernel(float* a,float *b,float *c){
int nIndex=blockIdx.x*blockDim.x+threadIdx.x;
c[nIndex]=a[nIndex]+b[nIndex];
return ;
}
int main (){
float a[6],b[6],c[6];
for (int i=0;i<6;i++){
a[i]=b[i]=c[i]=i;
}
float *ad,*bd,*cd;
int nSize=6;
cudaMalloc((void**)&ad,nSize);
cudaMemcpy(ad,a,nSize,cudaMemcpyHostToDevice);
cudaMalloc((void**)&bd,6);
cudaMemcpy(bd,b,nSize,cudaMemcpyHostToDevice);
cudaMalloc((void**)&cd,6);
//////////////////
dim3 DimGrid(3,1,1);
dim3 DimBlock(2,1,1);
VectorAddKernel <<< DimGrid,DimBlock >>>(ad,bd,cd);
cudaMemcpy(c,cd,nSize,cudaMemcpyDeviceToHost);
////////////////
for (int i=0;i<6;i++){
cout<<c[i]<<" ";
}
cout<<endl;
cin>>nSize;
///////////////////
cudaFree(ad);cudaFree(bd);cudaFree(cd);
return 1;
}
However, from debuging, it seems that cudaMalloc is not performed successfully.
My notebook has both an individual Video card and a HD integrated card, and is using the optimus technology to switch.
I am wondering whether the code is not running with Nvidia card or that GT420M is not supported by Nsight.
Thanks for your concerning.