#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <malloc.h>
#include <string>
#include <cutil.h>
#include <cutil_math.h>
#include <cutil_inline.h>
#include <cufft.h>
#include <Windows.h>
using namespace std;
#define BLOCK 16
#define PI (3.14159265f)
#define detector_xresolution 1320
#define detector_yresolution 1536
#define number_of_projections 50
//////////////////////////////////////////////////////////////////////////
// 函数声明
void bpf();
__global__ void get_derivative_CUDA(float *derivative_data, float *proj_data_0);
void load_projection_and_calc_derivative_TD_SGN(float **derivative_data);
int main()
{
bpf();
return 0;
}
void bpf()
{
long start = GetTickCount();
size_t proj_mem_size = sizeof(float) * detector_xresolution * detector_yresolution;
//////////////////////////////////////////////////////////////////////////
// 分é…内å˜ç©ºé—´ï¼Œå˜æ”¾æŠ•影数æ®çš„导函数
float **derivative_data = (float **)malloc(sizeof(float *) * number_of_projections);
if (!derivative_data)
{
printf("Error malloc data derivative_data!\n");
exit(-1);
}
for(int i = 0; i < number_of_projections; i ++)
{
derivative_data[i] = (float *)malloc(proj_mem_size);
if (!derivative_data[i])
{
printf("Error malloc data derivative_data[%d]!\n", i);
exit(-1);
}
}
//////////////////////////////////////////////////////////////////////////
// è¯»å–æŠ•å½±æ•°æ®å¹¶æ±‚å¯¼åŠ æƒ
load_projection_and_calc_derivative_TD_SGN(derivative_data);
// 释放内å˜ç©ºé—´ï¼ŒæŠ•影数æ®çš„导数
for(int i = 0; i < number_of_projections; i ++)
{
free(derivative_data[i]);
}
free(derivative_data);
long stop = GetTickCount();
printf("time used: %f s\n", (stop - start) / 1000.0f);
}
void load_projection_and_calc_derivative_TD_SGN(float **derivative_data)
{
size_t proj_mem_size = sizeof(float) * detector_xresolution * detector_yresolution;
dim3 bDim_proj(BLOCK, BLOCK);
dim3 gDim_proj((detector_xresolution + BLOCK - 1) / BLOCK, (detector_yresolution + BLOCK - 1) / BLOCK);
FILE *fp;
char proj_name[100];
float *proj_data = (float *)malloc(proj_mem_size);
float *d_proj_data_0;
float *d_derivative_data;
CUDA_SAFE_CALL(cudaMalloc((void **)&d_proj_data_0, proj_mem_size));
CUDA_SAFE_CALL(cudaMalloc((void **)&d_derivative_data, proj_mem_size));
cudaThreadSynchronize();
for(int dr = 0; dr < number_of_projections; dr ++)
{
sprintf(proj_name, "../proj_data/CT%4.4i.proj", dr);
printf("%s\n", proj_name);
fp = fopen(proj_name, "rb");
if (!fp)
{
printf("error open file!\n");
exit(-1);
}
fread(proj_data, 1, proj_mem_size, fp);
fclose(fp);
CUDA_SAFE_CALL(cudaMemcpy(d_proj_data_0, proj_data, proj_mem_size, cudaMemcpyHostToDevice));
get_derivative_CUDA<<<gDim_proj, bDim_proj>>>(d_derivative_data, d_proj_data_0);
cudaThreadSynchronize();
CUDA_SAFE_CALL(cudaMemcpy(derivative_data[dr], d_derivative_data, proj_mem_size, cudaMemcpyDeviceToHost));
cudaThreadSynchronize();
}
for(int i = 0; i < 50; i ++)
{
sprintf(proj_name, "%4.4i", i);
fp = fopen(proj_name, "wb");
fwrite(derivative_data[i], 1, proj_mem_size, fp);
fclose(fp);
}
free(proj_data);
cudaFree(d_proj_data_0);
cudaFree(d_derivative_data);
}
__global__ void get_derivative_CUDA(float *derivative_data, float *proj_data_0)
{
int w = blockIdx.x * blockDim.x + threadIdx.x;
int h = blockIdx.y * blockDim.y + threadIdx.y;
if (h < detector_yresolution || w < detector_xresolution)
{
if (h == 0 || h == detector_yresolution - 1 || w == 0 || w == detector_xresolution - 1)
derivative_data[h * detector_xresolution + w] = 0.0f;
else
{
derivative_data[h * detector_xresolution + w] = proj_data_0[h * detector_xresolution + w + 1] - proj_data_0[h * detector_xresolution + w - 1];
}
}
}
this is my program, just to calculate the derivative of the data.