Hi all;
I’m trying to copy string from CPU from a text file to a GPU kernel. The program works and it seems like it has a limitation where I only can copy a maximum of 300 strings (String_Total_300.txt) of variable length in total to the GPU kernel at one time. This is because when I’m trying to copy the string of a variable length from CPU from a text file (String_Total_313.txt), the program hang. The NVIDIA GPU model is GeForce GT 640M LE
Below here is my source code :
#include “cuda_runtime.h”
#include “device_launch_parameters.h”
#include
#include
#include <stdio.h>
using namespace std;
#define WORDLENGTH 300
global void insert(char **dev_pWordListArray, int *dev_pWordListLength, int *dev_pTotalNumWordList) {
printf("GPU : hello cuda !\n");
for(int i=0; i < *dev_pTotalNumWordList; i++) {
printf("%s\n", dev_pWordListArray[i]);
}
}
int main()
{
char** ppWord_list_ptr_array = new char* ;
int Word_list_Array;
int* pWordnum = &Word_list_Array;
*pWordnum = 0;
int g_Wordnumber;
char *pWordListArray [WORDLENGTH];
int pWordListLength[WORDLENGTH] ;
char **dev_pWordListArray;
int *dev_pWordListLength;
int *dev_pTotalWordListArray;
//static const char signature_list[] = {"String_Total_313.txt"};
static const char signature_list[] = {"String_Total_300.txt"};
printf("Loading signature list %s .....\n", signature_list);
ifstream stream;
stream.open(signature_list,ios::in);
if(!stream) {
printf("Error !, Fail to open file %s\n", signature_list);
return false;
}
char temp[WORDLENGTH] = {'\0'};
while(!stream.eof()) {
stream.getline(temp,WORDLENGTH);
(*pWordnum)++;
}
g_Wordnumber = *pWordnum;
stream.clear();
stream.seekg(0);
ppWord_list_ptr_array = new char*[*pWordnum];
for(int i=0; i <(*pWordnum); i++) {
ppWord_list_ptr_array[i] = new char[WORDLENGTH];
stream.getline(ppWord_list_ptr_array[i],WORDLENGTH);
}
printf("Signature list loaded ...\n");
for (int i=0; i<g_Wordnumber; i++) {
pWordListArray[i] = (char*)ppWord_list_ptr_array[i];
pWordListLength[i] = strlen(ppWord_list_ptr_array[i]);
}
stream.close();
const int dev_pWordListArray_sizeof = (WORDLENGTH)*sizeof(char);
const int dev_pWordListLength_sizeof = (WORDLENGTH)*sizeof(int);
const int dev_pTotalWordListArray_sizeof = 1*sizeof(int);
const int listWords = g_Wordnumber * sizeof(char*);
cudaHostAlloc(&dev_pWordListArray, listWords, cudaHostAllocMapped);
printf("Total word : %d\n", g_Wordnumber);
for (int i=0; i < g_Wordnumber; i++) {
// Allocate memory in device for each string element
cudaMalloc(&(dev_pWordListArray[i]),(pWordListLength[i] + 1)*sizeof(char));
// Copy List Element,from Host String to Device
cudaMemcpy(dev_pWordListArray[i], pWordListArray[i], (pWordListLength[i] + 1)*sizeof(char),cudaMemcpyHostToDevice);
}
cudaMalloc((void**)&(dev_pWordListLength), dev_pWordListLength_sizeof);
cudaMalloc((void**)&(dev_pTotalWordListArray), dev_pTotalWordListArray_sizeof);
cudaMemcpy(dev_pWordListLength, pWordListLength, dev_pWordListLength_sizeof, cudaMemcpyHostToDevice);
cudaMemcpy(dev_pTotalWordListArray, &g_Wordnumber, dev_pTotalWordListArray_sizeof, cudaMemcpyHostToDevice);
insert <<< 1, 1>>>( dev_pWordListArray, dev_pWordListLength, dev_pTotalWordListArray);
cudaFree(dev_pWordListArray);
cudaFree(dev_pWordListLength);
cudaFree(dev_pTotalWordListArray);
return 0;
}
Can anyone here provide some help to identify where is the problem is ?
String_Total_300.txt (5.39 KB)
String_Total_313.txt (5.57 KB)