Hello,
Trying to convert a float array to an array of float2 type by transferring values of original float array to the real part of the float2 array (f2_array.x). The input float array is 2D.
The kernel I created to do this seems pretty straight forward, but there could be an error I’m not seeing. Is there a problem with the way the variable f2_array is declared?
Declaring the variable as “float2 *f2_array;” gives a segmentation fault when trying to output the values of the array.
Declaring as"float2 *f2_array;" gives a segmentation fault when the variable is declared.
Declaring as “static float2 f2_array;” outputs the following:
Cuda Array test:
21 21 21 21 21
26 21 16 21 12
21 21 40 23 14
16 40 26 24 13
21 23 24 21 18
Cuda Float2 Array test:
-256 -256 -256 -256 -256
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
These output values should be the same in both cases I believe.
#include <cufft.h>
#include <cuda_runtime.h>
#include <cuda.h>
#include <cuda_device_runtime_api.h>
#include <iostream>
#include <stdlib.h>
#include <stdio.h>
#include "header.h"
#define height 2048
#define width 2448
#define size 5013504
__global__ void datatransfer(float2 *f2, float *f)
{
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;
if (x>0 && x<width && y>0 && y< height) {
f2[width*y+x].x = f[width*y+x];
f2[width*y+x].y = 0;
}
}
int fastft(float *array)
{
//Create Variables
float2 *f2_array;
//or static float2 f2_array;
dim3 threadsPerBlock(153,128);
dim3 numBlocks(16,16);
//Display input Array values
std::cout<<"Cuda Array test: " <<std::endl;
for (int i=0;i<5;i++) {
std::cout<<array[10*i] <<" "<< array[20*i] <<" "<< array[30*i] <<" "<< array[40*i] <<" "<< array[50*i] <<std::endl;
}
//Allocate memory on GPU
cudaMalloc((void **)&array,sizeof(float)*size);
cudaMalloc((void **) &f2_array, sizeof(float2)*size);
//Copy variables from host to device
cudaMemcpy(array,array,sizeof(float)*size,cudaMemcpyHostToDevice);
cudaMemcpy(f2_array,f2_array,sizeof(float2)*size,cudaMemcpyHostToDevice);
//Execute kernel that performs conversion from float --> float2
datatransfer<<<numBlocks,threadsPerBlock>>>(f2_array,array);
//Copy
cudaMemcpy(f2_array,f2_array,sizeof(float)*size,cudaMemcpyDeviceToHost);
//Display results of conversion
std::cout<<" "<<std::endl;
std::cout<<"Cuda Float2 Array test: " <<std::endl;
for (int i=0;i<5;i++) {
std::cout<<f2_array[10*i].x <<" "<< f2_array[20*i].x <<" "<< f2_array[30*i].x <<" "<< f2_array[40*i].x <<" "<< f2_array[50*i].x <<std::endl;
}
cudaFree(array);
cudaFree(f2_array);
return 0;
}