Hello there. I’ve got some problem with the sample from SDK. I changed one of them(vectorAdd sample). I work at win xp 32b sp3, visual studio 2008 and cuda 3.0.
When I only one file named vectorAdd.cu with code it works fine(does add +1 to each digit. After that I’ve got alphabet from A to Z):
// Device code
__global__ void VecAdd(float* A, int N)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < N)
A[i]=A[i]+i;
}
// Host code
int main(int argc, char** argv)
{
int len=34;
size_t size = len * sizeof(float);
// Allocate input vectors h_A and h_B in host memory
h_A = (float*)malloc(size);
if (h_A == 0) Cleanup();
// Initialize input vectors
RandomInit(h_A, len);
// Allocate vectors in device memory
cutilSafeCall( cudaMalloc((void**)&d_A, size) );
// Copy vectors from host memory to device memory
cutilSafeCall( cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice) );
// Invoke kernel
int threadsPerBlock = 256;
int blocksPerGrid = (len + threadsPerBlock - 1) / threadsPerBlock;
VecAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, len);
cutilCheckMsg("kernel launch failure");
#ifdef _DEBUG
cutilSafeCall( cudaThreadSynchronize() );
#endif
// Copy result from device memory to host memory
// h_A contains the result in host memory
cutilSafeCall( cudaMemcpy(h_A, d_A, size, cudaMemcpyDeviceToHost) );
for(int ii=0; ii < len; ++ii)
{
std::cout<<"Literka "<<(char)h_A[ii];
std::cout << std::endl;
}
Cleanup();
}
void Cleanup(void)
{
// Free device memory
if (d_A)
cudaFree(d_A);
// Free host memory
if (h_A)
free(h_A);
cutilSafeCall( cudaThreadExit() );
if (!noprompt) {
printf("\nPress ENTER to exit...\n");
fflush( stdout);
fflush( stderr);
getchar();
}
exit(0);
}
// Allocates an array with random float entries.
void RandomInit(float* data, int n)
{
for (int i = 0; i < n; ++i)
data[i] = 65;// rand() / (float)RAND_MAX;
}
// Parse program arguments
void ParseArguments(int argc, char** argv)
{
for (int i = 0; i < argc; ++i)
if (strcmp(argv[i], "--noprompt") == 0 ||
strcmp(argv[i], "-noprompt") == 0)
{
noprompt = true;
break;
}
}
I was trying to make from that cppIntegrator like from example(cppIntegrator in SDK folder). I don’t get it why it doesn’t work. I don’t get alphabet from A-Z, just A letter.
It looks like that:
kamerl.cu - my kernel
#ifndef _KARMEL_H_
#define _KARMEL_H_
// Device code
__global__ void VecAdd(float* A, int N)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < N)
A[i]=A[i]+i;
}
#endif
vectorAdd.cu - cppIntegrator(like in SDK example):
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, project
#include <cutil_inline.h>
// includes, kernels
#include <karmel.cu>
float* d_A;
// Host code
extern "C" void runKernel( const int argc, const char** argv, float* h_A, int len )
{
if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") )
cutilDeviceInit(argc, (char**)argv);
else
cudaSetDevice( cutGetMaxGflopsDeviceId() );
size_t size = len * sizeof(float);
// Allocate input vectors h_A and h_B in host memory
h_A = (float*)malloc(size);
printf("h_a %d", h_A);
printf("\n");
// Allocate vectors in device memory
cutilSafeCall( cudaMalloc((void**)&d_A, size) );
// Copy vectors from host memory to device memory
cutilSafeCall( cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice) );
// Invoke kernel
int threadsPerBlock = 256;
int blocksPerGrid = (len + threadsPerBlock - 1) / threadsPerBlock;
VecAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, len);
cutilCheckMsg("kernel launch failure");
#ifdef _DEBUG
cutilSafeCall( cudaThreadSynchronize() );
#endif
// Copy result from device memory to host memory
// h_A contains the result in host memory
cutilSafeCall( cudaMemcpy(h_A, d_A, size, cudaMemcpyDeviceToHost) );
cutilSafeCall(cudaFree(d_A));
free(h_A);
cudaThreadExit();
}
main.cpp
#include <stdio.h>
#include <iostream>
#include <cutil_inline.h>
using namespace std;
extern "C" void runKernel( const int argc, const char** argv , float* h_A, int len );
int main(int argc, char** argv)
{
int dlug=34;
float tab[]={ 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65 };
runKernel(argc, (const char**)argv, tab, dlug);
for(int ii=0; ii < dlug; ++ii)
{
cout<<"Literka "<<(char)tab[ii]<<endl;
}
system("pause");
return 0;
}
== edit ==
Problem has been solved :)