Hi. I wrote two apps which should give the same result, but they don’t. Can you help me find the problem?
These are apps that do 1D interpolation on texture memory. the memory is constant. the first app gives the correct result.
when i split the program into two files, the interpolation returns 0. sometimes it even gives a segfault.
First app (works):
file main.cu
[codebox]#include
#include <cuda.h>
using namespace std;
texture <float, 1, cudaReadModeElementType> tex_pointer;
global void interpolate(float *data){
*data = tex1D(tex_pointer, .5f);
}
int main(int argc, char **argv){
int n = 2;
float *data_h, *data_d, temp_h, *temp_d;
cudaMalloc((void**) &data_d, sizeof(float)*n);
cudaMalloc((void**) &temp_d, sizeof(float));
data_h = new float[n];
for(int i=0; i<n; i++){
data_h[i] = 1337.f;
}
temp_h = 0.f;
cudaMemcpy(temp_d, &temp_h, sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(data_d, data_h, sizeof(float)*n, cudaMemcpyHostToDevice);
tex_pointer.normalized = true;
tex_pointer.filterMode = cudaFilterModeLinear;
tex_pointer.addressMode[0] = cudaAddressModeWrap;
cudaChannelFormatDesc channelDesc;
channelDesc = cudaCreateChannelDesc();
cudaBindTexture(0, tex_pointer, data_d, channelDesc, n);
dim3 dimGrid, dimBlock;
interpolate <<<dimGrid, dimBlock>>> (temp_d);
cudaMemcpy(&temp_h, temp_d, sizeof(float), cudaMemcpyDeviceToHost);
cout << temp_h << endl;
cudaUnbindTexture(tex_pointer);
delete data_h;
cudaFree(temp_d);
cudaFree(data_d);
return 0;
}[/codebox]
compile with: nvcc -o test main.cu
The second app: (doesn’t work)
file main.cu
[codebox]#include
#include “main.h”
int main(int argc, char **argv){
int n = 2;
float *data_h, *data_d, temp_h, *temp_d;
cudaMalloc((void**) &data_d, sizeof(float)*n);
cudaMalloc((void**) &temp_d, sizeof(float));
data_h = new float[n];
for(int i=0; i<n; i++){
data_h[i] = 1337.f;
}
temp_h = 0.f;
cudaMemcpy(temp_d, &temp_h, sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(data_d, data_h, sizeof(float)*n, cudaMemcpyHostToDevice);
tex_pointer.normalized = true;
tex_pointer.filterMode = cudaFilterModeLinear;
tex_pointer.addressMode[0] = cudaAddressModeWrap;
cudaChannelFormatDesc channelDesc;
channelDesc = cudaCreateChannelDesc();
cudaBindTexture(0, tex_pointer, data_d, channelDesc, n);
dim3 dimGrid, dimBlock;
interpolate <<<dimGrid, dimBlock>>> (temp_d);
cudaMemcpy(&temp_h, temp_d, sizeof(float), cudaMemcpyDeviceToHost);
cout << temp_h << endl;
cudaUnbindTexture(tex_pointer);
delete data_h;
cudaFree(temp_d);
cudaFree(data_d);
return 0;
}
[/codebox]
file main.h
[codebox]
#include <cuda.h>
using namespace std;
texture <float, 1, cudaReadModeElementType> tex_pointer;
global void interpolate(float *data);
[/codebox]
file kernel.cu
[codebox]
#include “main.h”
global void interpolate(float *data){
*data = tex1D(tex_pointer, .5f);
}
[/codebox]
compile with:
nvcc -c kernel.cu
nvcc -c main.cu
nvcc -o test main.o kernel.o