While I compile my cuda code and run, I came across with the following.
I cannot use printf("%d ", var_x)
function to print out variable lets say int var_x;
I have to use std::cout << var_x;
to get correct value. I can attach my sample code and output. Can someone explain why?
#include <stdio.h>
#include <time.h>
#include <cuda_runtime.h>
#include <cassert>
#include <cstdlib>
#include <functional>
#include <iostream>
#include <algorithm>
#include <vector>
using std::cout;
using std::generate;
using std::vector;
#define CUDA_CALL(x) do { if((x)!=cudaSuccess) { \
printf("Error at %s:%d\n",__FILE__,__LINE__);\
return EXIT_FAILURE;}} while(0)
#define CHECK(x) do { if((x)!=cudaSuccess) { \
printf("Error at %s:%d\n",__FILE__,__LINE__);\
return EXIT_FAILURE;}} while(0)
void checkArray(vector<float> &host, float &cpuRef, const int size){
float temp_i = 0.0;
std::cout << "Printing array....\n" ;
for(int i=0 ; i<size; i++){
std::cout << host[i] << '\t';
temp_i += host[i];
if((i+1)%5 == 0)
std::cout << '\n';
}
cpuRef = temp_i;
}
int main(void){
// set up device
int dev = 0;
cudaDeviceProp deviceProp;
CHECK(cudaGetDeviceProperties(&deviceProp, dev));
printf("Using Device %d: %s\n", dev, deviceProp.name);
CHECK(cudaSetDevice(dev));
int size = 20;
printf("Array Size: %d \n", size);
// initialize random number
srand ((int)time(0));
// initialize vector and generate random indices between 0 and 5.
vector<float> host_a(size);
vector<float> host_b(size);
generate(host_a.begin(), host_a.end(), []() { return rand() % 15; });
generate(host_b.begin(), host_b.end(), []() { return rand() % 15; });
float cpuRefa = 0.0f;
float cpuRefb = 0.0f;
checkArray(host_a, cpuRefa, size);
checkArray(host_b, cpuRefb, size);
// declare block and grid dimension.
std::cout << "done print array. Total (sum of all) a:" << cpuRefa << "\n";
std::cout << "done print array. Total (sum of all) b:" << cpuRefb << "\n";
printf("done print array. Total (sum of all) a: %d\n", cpuRefa);
printf("done print array. Total (sum of all) b: %d\n", cpuRefb);
}
The output is as followed. Notice that cpuRefa and cpuRefb printed by C++ std:cout
are giving the correct answers (135, 146) while C’s printf()
answer is incorrect (0, 0).
Array Size: 20
Printing array....
10 3 13 3 6
5 2 11 12 6
0 11 11 6 8
14 4 1 6 3
Printing array....
6 5 13 9 11
3 2 8 6 2
9 8 12 7 3
10 4 12 7 9
done print array. cpuRefa:135
done print array. cpuRefb:146
done print array. cpuRefa: 0
done print array. cpuRefb: 0