Texture interpolation of 2D array of float4s Am trying to use linear interpolation of a 2D texture o


I am trying to create a code that will follow test particles through a constant magnetic field in a torus. The magnetic field is rotationally symmetric through the torus, and is read in from a text file containing 4225, or 65x65 grid, of points. I can read this file into host and then device memory, and copy it back to host memory fine, and I think that I have bound the array to a 2D texture, but I cannot get any results from the texture interpolation. I am new to CUDA, and C, and would very much appreciate any help that can be given.

The code I am using is:

[codebox]//include libraries

#include <stdio.h>

//define texture type?

texture<float4, 2, cudaReadModeElementType>tex_B;


global void transformKernal(float4 vel, float4 pos, float *d_out)


vel = tex2D(tex_B, 0, 0);

d_out = vel.x;


int main()


//outdata things

float *d_out;

//define array to hold mag field data

float4 arrayTwo[65][65];

//reads from BFile, gives 6 arrays, all npts long.

FILE* Bfile = fopen("Bfile.txt", "r");

double R0, Z0, Br0, Bz0, Bt0, psi0;

int npts = 0;

while(fscanf(Bfile, "%lf %lf %lf %lf %lf %lf", &R0, &Z0, &Br0, &Bz0, &Bt0, &psi0) == 6) npts++;

fprintf(stderr, "npts = %i\n", npts);

double* R_b = (double*)malloc(npts * sizeof(double));

double* Z_b = (double*)malloc(npts * sizeof(double));

double* Br_b = (double*)malloc(npts * sizeof(double));

double* Bz_b = (double*)malloc(npts * sizeof(double));

double* Bt_b = (double*)malloc(npts * sizeof(double));

double* psi_b = (double*)malloc(npts * sizeof(double));


for(int i = 0; i < npts; i++)


	fscanf(Bfile, "%lf %lf %lf %lf %lf %lf", &(R_b[i]), &(Z_b[i]), &(Br_b[i]), &(Bz_b[i]), &(Bt_b[i]), &(psi_b[i]));



int count = 0;

//test print statement

printf("Br_b[0] =%f \n", Br_b[0]);

//put mag field data into array

for (int i = 0; i< 65; i++)


	for (int j = 0; j< 65; j++)


		arrayTwo[i][j] = make_float4(Br_b[count], Bz_b[count], Bt_b[count], psi_b[count]);




//test print statements

printf("count = %i \n", count);

printf("arrayTwo[0][0].x %f \n", arrayTwo[0][0].x);


//define size of arrays?

int _N = 65;

int _M = 65;

//allocate array on the device for the mag field data

cudaArray* array_d;

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float4>();

cudaExtent extent;

extent.width = _N;

extent.height = _M;

extent.depth = 1;

cudaMalloc3DArray(&array_d, &channelDesc, extent);

//Copy mag field data to the device

cudaMemcpy2DToArray(array_d, 0, 0, arrayTwo, 65*sizeof(float4), 65*sizeof(float4), 65, cudaMemcpyHostToDevice);

//set texture referencing parameters

tex_B.addressMode[0] 	= cudaAddressModeClamp;

tex_B.addressMode[1] 	= cudaAddressModeClamp;

tex_B.filterMode 	= cudaFilterModeLinear;

tex_B.normalized 	= false;

//bind the texture to the array on the device

cudaBindTextureToArray(tex_B, array_d, channelDesc);

//Create array on the host, to test if the mag field data can be moved back and forth.

float4 arrayThree[65][65];

for (int i = 0; i< 65; i++)


	for (int j = 0; j< 65; j++)


		arrayThree[i][j] = make_float4(1.0, 1.0, 1.0, 1.0);



//test print statement

printf("arrayThreeTest: %f \n", arrayThree[0][0].x);

//copy mag field data from the device to the host

cudaMemcpy2DFromArray(arrayThree, 65*sizeof(float4), array_d, 0, 0, 65*sizeof(float4), 65, cudaMemcpyDeviceToHost);

//test print statement.

printf("arrayThree[0][0].x:%f \n",arrayThree[0][0].x);

//initial position float

float4 pos = make_float4(0.0, 0.0, 0.0, 0.0);

//velocity float

// float4 vel = make_float4(1.0, 1.0, 1.0, 1.0);

//call the kernel

transformKernal<<<1, 1>>>(vel, pos, d_out);

//create float on host to see if data has been interpolated

float *outdata = (float*) malloc(sizeof(float));

//copy float from device to host to see if texture has been interpolated

cudaMemcpy( outdata, d_out, (sizeof(float)), cudaMemcpyDeviceToHost);

//test print statement

printf("outdata = %f \n", outdata);

return 0;



where the input file, Bfile.txt, has 4225 rows such as:

[codebox]0.060000 -2.000000 -0.003695 0.044421 -6.820114 -0.000065[/codebox]

If I try to compile this code, using the -deviceemu flag, I get warnings that “d_out” is set but not used, and that “d_out” is used but not set.

  1. You never allocated storage on the device for d_out.

  2. within the kernel, you are assigning to a pointer instead of the value it points to: use “*d_out=” instead of “d_out=”


Thank you! Exactly what I needed to know.