Hello,
I am trying to create a code that will follow test particles through a constant magnetic field in a torus. The magnetic field is rotationally symmetric through the torus, and is read in from a text file containing 4225, or 65x65 grid, of points. I can read this file into host and then device memory, and copy it back to host memory fine, and I think that I have bound the array to a 2D texture, but I cannot get any results from the texture interpolation. I am new to CUDA, and C, and would very much appreciate any help that can be given.
The code I am using is:
[codebox]//include libraries
#include <stdio.h>
//define texture type?
texture<float4, 2, cudaReadModeElementType>tex_B;
//kernel
global void transformKernal(float4 vel, float4 pos, float *d_out)
{
vel = tex2D(tex_B, 0, 0);
d_out = vel.x;
}
int main()
{
//outdata things
float *d_out;
//define array to hold mag field data
float4 arrayTwo[65][65];
//reads from BFile, gives 6 arrays, all npts long.
FILE* Bfile = fopen("Bfile.txt", "r");
double R0, Z0, Br0, Bz0, Bt0, psi0;
int npts = 0;
while(fscanf(Bfile, "%lf %lf %lf %lf %lf %lf", &R0, &Z0, &Br0, &Bz0, &Bt0, &psi0) == 6) npts++;
fprintf(stderr, "npts = %i\n", npts);
double* R_b = (double*)malloc(npts * sizeof(double));
double* Z_b = (double*)malloc(npts * sizeof(double));
double* Br_b = (double*)malloc(npts * sizeof(double));
double* Bz_b = (double*)malloc(npts * sizeof(double));
double* Bt_b = (double*)malloc(npts * sizeof(double));
double* psi_b = (double*)malloc(npts * sizeof(double));
rewind(Bfile);
for(int i = 0; i < npts; i++)
{
fscanf(Bfile, "%lf %lf %lf %lf %lf %lf", &(R_b[i]), &(Z_b[i]), &(Br_b[i]), &(Bz_b[i]), &(Bt_b[i]), &(psi_b[i]));
}
fclose(Bfile);
int count = 0;
//test print statement
printf("Br_b[0] =%f \n", Br_b[0]);
//put mag field data into array
for (int i = 0; i< 65; i++)
{
for (int j = 0; j< 65; j++)
{
arrayTwo[i][j] = make_float4(Br_b[count], Bz_b[count], Bt_b[count], psi_b[count]);
count++;
}
}
//test print statements
printf("count = %i \n", count);
printf("arrayTwo[0][0].x %f \n", arrayTwo[0][0].x);
//define size of arrays?
int _N = 65;
int _M = 65;
//allocate array on the device for the mag field data
cudaArray* array_d;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float4>();
cudaExtent extent;
extent.width = _N;
extent.height = _M;
extent.depth = 1;
cudaMalloc3DArray(&array_d, &channelDesc, extent);
//Copy mag field data to the device
cudaMemcpy2DToArray(array_d, 0, 0, arrayTwo, 65*sizeof(float4), 65*sizeof(float4), 65, cudaMemcpyHostToDevice);
//set texture referencing parameters
tex_B.addressMode[0] = cudaAddressModeClamp;
tex_B.addressMode[1] = cudaAddressModeClamp;
tex_B.filterMode = cudaFilterModeLinear;
tex_B.normalized = false;
//bind the texture to the array on the device
cudaBindTextureToArray(tex_B, array_d, channelDesc);
//Create array on the host, to test if the mag field data can be moved back and forth.
float4 arrayThree[65][65];
for (int i = 0; i< 65; i++)
{
for (int j = 0; j< 65; j++)
{
arrayThree[i][j] = make_float4(1.0, 1.0, 1.0, 1.0);
}
}
//test print statement
printf("arrayThreeTest: %f \n", arrayThree[0][0].x);
//copy mag field data from the device to the host
cudaMemcpy2DFromArray(arrayThree, 65*sizeof(float4), array_d, 0, 0, 65*sizeof(float4), 65, cudaMemcpyDeviceToHost);
//test print statement.
printf("arrayThree[0][0].x:%f \n",arrayThree[0][0].x);
//initial position float
float4 pos = make_float4(0.0, 0.0, 0.0, 0.0);
//velocity float
// float4 vel = make_float4(1.0, 1.0, 1.0, 1.0);
//call the kernel
transformKernal<<<1, 1>>>(vel, pos, d_out);
//create float on host to see if data has been interpolated
float *outdata = (float*) malloc(sizeof(float));
//copy float from device to host to see if texture has been interpolated
cudaMemcpy( outdata, d_out, (sizeof(float)), cudaMemcpyDeviceToHost);
//test print statement
printf("outdata = %f \n", outdata);
return 0;
}
[/codebox]
where the input file, Bfile.txt, has 4225 rows such as:
[codebox]0.060000 -2.000000 -0.003695 0.044421 -6.820114 -0.000065[/codebox]
If I try to compile this code, using the -deviceemu flag, I get warnings that “d_out” is set but not used, and that “d_out” is used but not set.