HI:
I wrote a simple program for integer comparison between two arrays. But, i am not able to understand why it is not working ( External Image )
I have two array A[112], B[112] integers.
I want to compare all the elements in A to all elements in B (i.e. we have 112*112 comparisons). I want to filter and report only the index of A and B whose elements are same.
In my final output i am getting all index’s. Please help me. Here is my code:
#include “string.h”
#include “stdio.h”
#include “cutil.h”
#include “time.h”
global void compare_arrays_gpu( int *in1, int *in2, int compout, int seq1out, int seq2out, int seq11, int seq22)
{
int idx=blockIdx.xblockDim.x+threadIdx.x;
int idy=blockIdx.yblockDim.y+threadIdx.y;
int index=idx+idyseq22;
if(idx < seq11 && idy < seq22){
if(in1[idx]==in2[idy]){
compout[index]=1;
seq1out[index]=in1[idx];
seq2out[index]=in2[idy];
}
}
}
int main()
{
CUT_DEVICE_INIT();
/parameters/
int i;
int seq1_len=112;
int seq2_len=112;
time_t timer1;
timer1=time(NULL);
/* Allocate arrays a, b and c on host*/
int *seq1parts;
int *seq2parts;
int *compare;
int *seq1;
int *seq2;
seq1parts = (int*) malloc((seq1_len)sizeof(int));
seq2parts = (int) malloc((seq2_len)sizeof(int));
compare = (int) malloc((seq2_len)(seq1_len)sizeof(int));
seq1 = (int) malloc((seq2_len)(seq1_len)sizeof(int));
seq2 = (int) malloc((seq2_len)*(seq1_len)*sizeof(int));
int k;
for(k=0; k<seq1_len; k++){
seq1parts[k]=k;
}
for(k=0; k<seq2_len; k++){
seq2parts[k]=k;
}
/* pointers to device memory /
int seq1parts_d;
int seq2parts_d;
int compare_d;
int seq1_d;
int seq2_d;
/ Allocate arrays a_d, b_d and c_d on device/
cudaMalloc ((void **) &seq1parts_d, sizeof(int)(seq1_len));
cudaMalloc ((void **) &seq2parts_d, sizeof(int)(seq2_len));
cudaMalloc ((void **) &compare_d, sizeof(int)(seq2_len)(seq1_len));
cudaMalloc ((void **) &seq1_d, sizeof(int)(seq2_len)(seq1_len));
cudaMalloc ((void **) &seq2_d, sizeof(int)(seq2_len)(seq1_len));
/* Copy data from host memory to device memory /
cudaMemcpy(seq1parts_d, seq1parts, sizeof(int)(seq1_len), cudaMemcpyHostToDevice);
cudaMemcpy(seq2parts_d, seq2parts, sizeof(int)*(seq2_len), cudaMemcpyHostToDevice);
/* Compute the execution configuration /
//int nblocks=(((seq1_len)(seq2_len))+255)/256;
int blocksize=16;
dim3 dimBlock(blocksize, blocksize);
dim3 dimGrid((seq1_len)/dimBlock.x, (seq2_len)/dimBlock.y);
/* Add arrays a and b, store result in c */
compare_arrays_gpu<<<dimGrid,dimBlock>>>(seq1parts_d, seq2parts_d, compare_d,seq1_d,seq2_d, (seq1_len),(seq2_len));
CUT_CHECK_ERROR(“Kernel function filed”);
/* Copy data from deveice memory to host memory /
cudaMemcpy(compare, compare_d, sizeof(int)(seq1_len)(seq2_len), cudaMemcpyDeviceToHost);
cudaMemcpy(seq1, seq1_d, sizeof(int)(seq1_len)(seq2_len), cudaMemcpyDeviceToHost);
cudaMemcpy(seq2, seq2_d, sizeof(int)(seq1_len)*(seq2_len), cudaMemcpyDeviceToHost);
time_t timer2;
timer2=time(NULL);
printf(“%f\n”, difftime(timer2, timer1));
/* Print c /
for (i=0; i<(seq1_len)(seq2_len); i++)
printf(“%d %d %d \n”,seq1[i], seq2[i], compare[i]);
/* Free the memory */
free(seq1parts); free(seq2parts);free(compare);free(seq1);free(seq2);
CUDA_SAFE_CALL(cudaFree(seq1parts_d)); CUDA_SAFE_CALL(cudaFree(seq2parts_d));CUDA_SAFE_CALL(cudaFree(compare_d));CUDA_SAFE_CALL(cudaFree(seq1_d)); CUDA_SAFE_CALL(cudaFree(seq2_d));
}