Hi
I am trying to run double precision with my GTX 260 and CUDA 2.3 (latest greatest). However I am running into some issues.
This is the simple code that squares each element in an array with CUDA
// example1.cpp : Defines the entry point for the console application.
//
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <cutil.h>
// Kernel that executes on the CUDA device
__global__ void square_array(double *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx<N) a[idx] = a[idx] * a[idx];
}
// main routine that executes on the host
int main(void)
{
double *a_h, *a_d; // Pointer to host & device arrays
const int N = 10; // Number of elements in arrays
size_t size = N * sizeof(double);
a_h = (double *)malloc(size); // Allocate array on host
cudaMalloc((void **) &a_d, size); // Allocate array on device
// Initialize host array and copy it to CUDA device
for (int i=0; i<N; i++) a_h[i] = (double)i;
cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
// Do calculation on device:
int block_size = 4;
int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
square_array <<< n_blocks, block_size >>> (a_d, N);
cudaError_t result = cudaGetLastError();
if (result != cudaSuccess)
printf("oh no everything is ruined forever--the kernel didn't even launch\n");
result = cudaThreadSynchronize();
if (result != cudaSuccess)
printf("oh no now the kernel itself broke in the middle of execution\n");
// Retrieve result from device and store it in host array
cudaMemcpy(a_h, a_d, sizeof(double)*N, cudaMemcpyDeviceToHost);
// Print results
for (int i=0; i<N; i++) printf("%d %f\n", i, a_h[i]);
// Cleanup
free(a_h); cudaFree(a_d);
getchar();
return 0;
}
I installed the CUDA VS Wizard 2.0 so I basically just create a New Project in VS and I select “1.3 (hardware) Arch” and “1.3 (hardware) code” for the GPU architecture but when I run the code, I get:
1>nvcc fatal : Value of -arch option ('sm_13') must be a virtual code architecture
So then I change the GPU architecture to “1.3 (virtual) Arch” and “1.3 (virtual) code”, and the code complies and the answer is right
Questions:
-
why doesnt hardware option work? I have a GTX 260 it should support it?
-
does that mean i am emulating and not actually running on the GPU?? (i am noob)
Please help!