Hi, I am trying to get median of array. When i input in this array (3 2 5 8 9 4 1 7 6) it returns “0”.
Code using in terminal like : ./main input.txt 9
9 is number of entries.
Where am i doing wrong ? Could you help me?
CODE:
#include <iostream>
#include <fstream>
#include <cstdlib>
#define checkCudaErrors(err) __checkCudaErrors(err, __FILE__, __LINE__)
#define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__)
inline void __checkCudaErrors(cudaError err, const char *file, const int line) {
if(cudaSuccess != err) {
std::cout << file << "(" << line << ") : CUDA Runtime API error " << (int) err << ": " << cudaGetErrorString(err) << std::endl;
exit(3);
}
}
inline void __getLastCudaError(const char *errorMsg, const char *file, const int line) {
cudaError_t err = cudaGetLastError();
if(cudaSuccess != err) {
std::cout << file << "(" << line << ") : getLastCudaError() CUDA error : " << errorMsg << " : (" << (int) err << ") " << cudaGetErrorString(err) << std::endl;
exit(3);
}
}
__device__ inline void swapGpu(int &a, int &b) {
int dum = a;
a = b;
b = dum;
}
__global__ void gpuMedOdd(int *entries, int *med, int numEntries) {
extern __shared__ int sdata[];
int tid = threadIdx.x;
int i = blockIdx.x * (blockDim.x * 3) + threadIdx.x;
if(i + 2 * blockDim.x < numEntries) {
int list[3];
list[0] = entries[i], list[1] = entries[i + blockDim.x], list[2] = entries[i + 2 * blockDim.x];
if(list[1] < list[0])
swapGpu(list[1], list[0]);
if(list[2] < list[0])
swapGpu(list[2], list[0]);
if(list[2] < list[1])
swapGpu(list[2], list[1]);
sdata[tid] = list[1];
}
__syncthreads();
for(int s = blockDim.x / 3; s > 0; s /= 3) {
if(tid < s && tid + 2 * s < blockDim.x) {
int list[3];
list[0] = sdata[tid], list[1] = sdata[tid + s], list[2] = sdata[tid + 2 * s];
if(list[1] < list[0])
swapGpu(list[1], list[0]);
if(list[2] < list[0])
swapGpu(list[2], list[0]);
if(list[2] < list[1])
swapGpu(list[2], list[1]);
sdata[tid] = list[1];
}
__syncthreads();
}
*med = sdata[0];
}
int main(int argc, char *argv[]) {
if(argc != 3) {
std::cout << "ERROR: Incorrect number of input arguments" << std::endl;
std::cout << "Proper usage: " << argv[0] << " fileName numEntries" << std::endl;
exit(1);
}
std::ifstream inp(argv[1]);
if(!inp.is_open()) {
std::cout << "ERROR: File I/O error" << std::endl;
std::cout << "Could not find file " << argv[1] << std::endl;
exit(2);
}
int numEntries = atoi(argv[2]), i = 0;
int *entries = new int[numEntries];
while(inp >> entries[i] && i < numEntries)
i++;
if(i < numEntries) {
std::cout << "ERROR: File I/O error" << std::endl;
std::cout << "Command-line input suggested " << numEntries << " entries, but only found " << i << " entries" << std::endl;
exit(2);
}
if(inp >> i) {
std::cout << "ERROR: File I/O error" << std::endl;
std::cout << "Command-line input suggested " << numEntries << " entries, but file contains more entries" << std::endl;
exit(2);
}
int *d_entries;
checkCudaErrors(cudaMalloc(&d_entries, sizeof(int) * numEntries));
checkCudaErrors(cudaMemcpy(d_entries, entries, sizeof(int) * numEntries, cudaMemcpyHostToDevice));
if(numEntries % 2) {
std::cout << " if " << std::endl;
int med, *d_med;
checkCudaErrors(cudaMalloc(&d_med, sizeof(int)));
gpuMedOdd<<<9, numEntries / 9, numEntries / 9 * sizeof(int)>>>(d_entries, d_med, numEntries);
getLastCudaError("kernel launch failure");
checkCudaErrors(cudaMemcpy(&med, d_med, sizeof(int), cudaMemcpyDeviceToHost));
std::cout << "The median value is: " << med << std::endl;
}
else {
std::cout << " Else " << std::endl;
int *d_med, med;
cudaMalloc(&d_med, sizeof(int));
gpuMedOdd<<<9, numEntries / 9, numEntries / 9 * sizeof(int)>>>(d_entries, d_med, numEntries);
getLastCudaError("kernel launch failure");
checkCudaErrors(cudaMemcpy(&med, d_med, sizeof(int), cudaMemcpyDeviceToHost));
std::cout << "The median value is: " << med << std::endl;
}
exit(0);
}