I have this code that runs perfectly fine on the CPU, however it does compute results on the GPU. It does compile for the GPU and it does not throw a segmentation fault it just doesn’t do its job.
The CPU testfile:
[codebox] /*
-
Dominik Gothe
-
overlapArea.cu
-
Compute the overlapArea of two polygons and smile : )
-
ASSUMPTIONS ==== ASSUMPTIONS ==== ASSUMPTIONS ====
-
Polygon 1 is to the right of Polygon 0
-
Both poygons have the same number of sides nv_g
-
Will calculate convex hull of overlap, assume concave is not possible
-
overlapping verticies have not been adressed
-
the point (0,0) is illegitimate
-
dug says they can do dot and cross real fast
*/
#include<stdio.h>
#include<math.h>
//#include <cutil_inline.h> // cutilSafeCall
#include // pow( float base, float exp);
using namespace std;
float invalid = 6546746146874; // This serves to illimina
float *xval_0; float *yval_0; // Polygon 0
float *xval_1; float *yval_1; // Polygon 1
float *xval_n; float *yval_n; // Newly Created Polygon
float *overlapArea; // Area of New Polygon
int nv_g; // Number of Verticies of Input Polygons
int nv_n = 0; // Number of verticies of New Polygon
float *angle; // device can’t allocate memory
void FindIntersections() { // ============================================================
============================
// stupid ass computer can't deal with infinity nor does he understant the line x=2; freakin dumbass nvidea piece o crap
float infinitio = 10e20;
for(int i = 0; i<nv_g; i++){
int j = (i+1)%nv_g;
for(int k = 0; k<nv_g; k++){
int l=(k+1)%nv_g;
float ris_0 = yval_0[j] - yval_0[i];
float run_0 = xval_0[j] - xval_0[i];
float sloap_0 = infinitio;
float inter_0 = 0;
float ris_1 = yval_1[k] - yval_1[l];
float run_1 = xval_1[k] - xval_1[l];
float sloap_1 = infinitio;
float inter_1 = 0;
if( run_0 != 0 ){sloap_0 = ris_0/run_0;}
if( run_1 != 0 ){sloap_1 = ris_1/run_1;}
inter_0 = yval_0[i] - xval_0[i]*sloap_0;
inter_1 = yval_1[k] - xval_1[k]*sloap_1;
if( ( sloap_0 - sloap_1 != 0 ) && sloap_0 < infinitio && sloap_1 < infinitio ){
xval_n[i+k*4] = ( inter_0 - inter_1 ) / ( sloap_1 - sloap_0 );
yval_n[i+k*4] = xval_n[i]*sloap_0+inter_0;
} else if( ( sloap_0 < infinitio ) && !( sloap_1 < infinitio ) ){
xval_n[i+k*4] = xval_1[k];
yval_n[i+k*4] = xval_1[l]*sloap_0+inter_0;
} else if( ( sloap_1 < infinitio ) && !( sloap_0 < infinitio ) ){
xval_n[i+k*4] = xval_0[i];
yval_n[i+k*4] = xval_0[j]*sloap_1+inter_1;
} // the parralell case?? mhh do nothing
if(
(//----------------------------------------------------------------------------
( //============================================================
===========
( ( (xval_n[i+k*4] >= xval_0[i]) && (yval_n[i+k*4] >= yval_0[i]) )
&& ( (xval_n[i+k*4] <= xval_0[j]) && (yval_n[i+k*4] <= yval_0[j]) )
)
||
( ( (xval_n[i+k*4] >= xval_0[i]) && (yval_n[i+k*4] <= yval_0[i]) )
&& ( (xval_n[i+k*4] <= xval_0[j]) && (yval_n[i+k*4] >= yval_0[j]) )
)
||
( ( (xval_n[i+k*4] <= xval_0[i]) && (yval_n[i+k*4] >= yval_0[i]) )
&& ( (xval_n[i+k*4] >= xval_0[j]) && (yval_n[i+k*4] <= yval_0[j]) )
)
||
( ( (xval_n[i+k*4] <= xval_0[i]) && (yval_n[i+k*4] <= yval_0[i]) )
&& ( (xval_n[i+k*4] >= xval_0[j]) && (yval_n[i+k*4] >= yval_0[j]) )
)
) //============================================================
===========
)//-----------------------------------------------------------------------------
&&
(//-----------------------------------------------------------------------------
( //============================================================
===========
(
( (xval_n[i+k*4] >= xval_1[k]) && (yval_n[i+k*4] >= yval_1[k]) )
&& ( (xval_n[i+k*4] <= xval_1[l]) && (yval_n[i+k*4] <= yval_1[l]) )
)
||
(
( (xval_n[i+k*4] >= xval_1[k]) && (yval_n[i+k*4] <= yval_1[k]) )
&& ( (xval_n[i+k*4] <= xval_1[l]) && (yval_n[i+k*4] >= yval_1[l]) )
)
||
(
( (xval_n[i+k*4] <= xval_1[k]) && (yval_n[i+k*4] >= yval_1[k]) )
&& ( (xval_n[i+k*4] >= xval_1[l]) && (yval_n[i+k*4] <= yval_1[l]) )
)
||
(
( (xval_n[i+k*4] <= xval_1[k]) && (yval_n[i+k*4] <= yval_1[k]) )
&& ( (xval_n[i+k*4] >= xval_1[l]) && (yval_n[i+k*4] >= yval_1[l]) )
)
) //============================================================
==========
)//----------------------------------------------------------------------------
) { nv_n++; }
else { xval_n[i+k*4] = invalid; yval_n[i+k*4] = invalid; }
}
}
} // ============================================================
==========================================
int main(){
nv_g = 4;
xval_0 = (float*) malloc( sizeof(float)*nv_g );
yval_0 = (float*) malloc( sizeof(float)*nv_g );
// Allocate memory for Polygon_1 on the _host;
xval_1 = (float*) malloc( sizeof(float)*nv_g );
yval_1 = (float*) malloc( sizeof(float)*nv_g );
// Create the polygon_0 on _host
xval_0[0] = -.5; yval_0[0] = -.5;
xval_0[1] = .5; yval_0[1] = -.5;
xval_0[2] = .5; yval_0[2] = .5;
xval_0[3] = -.5; yval_0[3] = .5;
// Create the polygon_1 on _host
xval_1[0] = 0; yval_1[0] = -0.707106781;
xval_1[1] = 0.707106781; yval_1[1] = 0;
xval_1[2] = 0; yval_1[2] = 0.707106781;
xval_1[3] = -0.707106781; yval_1[3] = 0;
xval_n = (float*) malloc( sizeof(float)*nv_g*nv_g );
yval_n = (float*) malloc( sizeof(float)*nv_g*nv_g );
FindIntersections();
for(int i = 0; i < 4; i++){
for(int j = 0; j < 4; j++){
cout<<"("<<i<<","<<j<<"): ";
cout.precision(2);
cout.setf(ios::fixed,ios::floatfield);
if(xval_n[i+j*4] != invalid || true){cout<<xval_n[i+j*4]<<" , "<<yval_n[i+j*4];}
cout<<endl;
}
}
}[/codebox]
The same on the GPU:
[codebox] /*
-
Dominik Gothe
-
overlapArea.cu
-
Compute the overlapArea of two polygons and smile : )
-
- This pipeline can be easily adjusted to recive pointers or values
-
Ofcourse a pointer to the storage area for the Overlap is necessary
-
- define invalid point, a point that can not possibly occur
-
ASSUMPTIONS ==== ASSUMPTIONS ==== ASSUMPTIONS ====
-
- The Overlap Area is Convex
-
- Both poygons have the same number of sides nv_g
-
- overlapping verticies have not been adressed ???
*/
#include <stdio.h>
#include <math.h>
#include <cutil_inline.h> // cutilSafeCall
#include // pow( float base, float exp);
using namespace std;
device float invalid = 6416415;
device float *xval_0; device float *yval_0; // Polygon 0
device float *xval_1; device float *yval_1; // Polygon 1
device float *xval_n; device float *yval_n; // Newly Created Polygon
device float *overlapArea; // Area of New Polygon
device int nv_g; // Number of Verticies of Input Polygons
device int nv_n = 0; // Number of verticies of New Polygon
device float *angle; // device can’t allocate memory
device void FindIntersections() { // ============================================================
============================
float infinitio = 10e20;
for(int i = 0; i<nv_g; i++){
int j = (i+1)%nv_g;
for(int k = 0; k<nv_g; k++){
int l=(k+1)%nv_g;
float ris_0 = yval_0[j] - yval_0[i];
float run_0 = xval_0[j] - xval_0[i];
float sloap_0 = infinitio;
float inter_0 = 0;
float ris_1 = yval_1[k] - yval_1[l];
float run_1 = xval_1[k] - xval_1[l];
float sloap_1 = infinitio;
float inter_1 = 0;
if( run_0 != 0 ){sloap_0 = ris_0/run_0;}
if( run_1 != 0 ){sloap_1 = ris_1/run_1;}
inter_0 = yval_0[i] - xval_0[i]*sloap_0;
inter_1 = yval_1[k] - xval_1[k]*sloap_1;
if( ( sloap_0 - sloap_1 != 0 ) && sloap_0 < infinitio && sloap_1 < infinitio ){
xval_n[i+k*4] = ( inter_0 - inter_1 ) / ( sloap_1 - sloap_0 );
yval_n[i+k*4] = xval_n[i]*sloap_0+inter_0;
} else if( ( sloap_0 < infinitio ) && !( sloap_1 < infinitio ) ){
xval_n[i+k*4] = xval_1[k];
yval_n[i+k*4] = xval_1[l]*sloap_0+inter_0;
} else if( ( sloap_1 < infinitio ) && !( sloap_0 < infinitio ) ){
xval_n[i+k*4] = xval_0[i];
yval_n[i+k*4] = xval_0[j]*sloap_1+inter_1;
} // the parralell case?? mhh do nothing
if(
(//----------------------------------------------------------------------------
( //============================================================
===========
( ( (xval_n[i+k*4] >= xval_0[i]) && (yval_n[i+k*4] >= yval_0[i]) )
&& ( (xval_n[i+k*4] <= xval_0[j]) && (yval_n[i+k*4] <= yval_0[j]) )
)
||
( ( (xval_n[i+k*4] >= xval_0[i]) && (yval_n[i+k*4] <= yval_0[i]) )
&& ( (xval_n[i+k*4] <= xval_0[j]) && (yval_n[i+k*4] >= yval_0[j]) )
)
||
( ( (xval_n[i+k*4] <= xval_0[i]) && (yval_n[i+k*4] >= yval_0[i]) )
&& ( (xval_n[i+k*4] >= xval_0[j]) && (yval_n[i+k*4] <= yval_0[j]) )
)
||
( ( (xval_n[i+k*4] <= xval_0[i]) && (yval_n[i+k*4] <= yval_0[i]) )
&& ( (xval_n[i+k*4] >= xval_0[j]) && (yval_n[i+k*4] >= yval_0[j]) )
)
) //============================================================
===========
)//-----------------------------------------------------------------------------
&&
(//-----------------------------------------------------------------------------
( //============================================================
===========
(
( (xval_n[i+k*4] >= xval_1[k]) && (yval_n[i+k*4] >= yval_1[k]) )
&& ( (xval_n[i+k*4] <= xval_1[l]) && (yval_n[i+k*4] <= yval_1[l]) )
)
||
(
( (xval_n[i+k*4] >= xval_1[k]) && (yval_n[i+k*4] <= yval_1[k]) )
&& ( (xval_n[i+k*4] <= xval_1[l]) && (yval_n[i+k*4] >= yval_1[l]) )
)
||
(
( (xval_n[i+k*4] <= xval_1[k]) && (yval_n[i+k*4] >= yval_1[k]) )
&& ( (xval_n[i+k*4] >= xval_1[l]) && (yval_n[i+k*4] <= yval_1[l]) )
)
||
(
( (xval_n[i+k*4] <= xval_1[k]) && (yval_n[i+k*4] <= yval_1[k]) )
&& ( (xval_n[i+k*4] >= xval_1[l]) && (yval_n[i+k*4] >= yval_1[l]) )
)
) //============================================================
==========
)//----------------------------------------------------------------------------
) { nv_n++; }
else { xval_n[i+k*4] = invalid; yval_n[i+k*4] = invalid; }
}
}
} // ============================================================
==========================================
global void ComputeOverlap( //float *xval_0_p, float *yval_0_p,
//float *xval_1_p, float *yval_1_p,
float *xval_n_p, float *yval_n_p,
int nv_g_p,
float *overlapArea_p,
float *angle_p ){ // =========================================================
//xval_0 = xval_0_p; yval_0 = yval_0_p;
//xval_1 = xval_1_p; yval_1 = yval_1_p;
xval_n = xval_n_p; yval_n = yval_n_p;
overlapArea = overlapArea_p;
nv_g = nv_g_p;
angle = angle_p;
FindIntersections();
// RemoveInvalids();
// Merge();
// Order();
// PolyArea();
} // ============================================================
==========================================
int main(){
// Create the Polygon on _host and copy to _device =============================
const int nv_g_h = 4; // number of verticies for each polygon (global)
// Allocate memory for Polygon_0 on the _host;
float *xval_0_h; cudaMallocHost ( (void**)&xval_0_h, sizeof(float)*nv_g_h );
float *yval_0_h; cudaMallocHost ( (void**)&yval_0_h, sizeof(float)*nv_g_h );
// Allocate memory for Polygon_1 on the _host;
float *xval_1_h; cudaMallocHost ( (void**)&xval_1_h, sizeof(float)*nv_g_h );
float *yval_1_h; cudaMallocHost ( (void**)&yval_1_h, sizeof(float)*nv_g_h );
// Allocate memory for Polygon_1 on the _host;
float *xval_n_h; cudaMallocHost ( (void**)&xval_n_h, sizeof(float)*nv_g_h*nv_g_h );
float *yval_n_h; cudaMallocHost ( (void**)&yval_n_h, sizeof(float)*nv_g_h*nv_g_h );
// Allocate memory for Polygon_0 on the _device;
float *xval_0_d; cudaMalloc ( (void**)&xval_0_d, sizeof(float)*nv_g_h );
float *yval_0_d; cudaMalloc ( (void**)&yval_0_d, sizeof(float)*nv_g_h );
// Allocate memory for Polygon_1 on the _device;
float *xval_1_d; cudaMalloc ( (void**)&xval_1_d, sizeof(float)*nv_g_h );
float *yval_1_d; cudaMalloc ( (void**)&yval_1_d, sizeof(float)*nv_g_h );
// Allocate memory for Polygon_1 on the _device;
float *xval_n_d; cudaMalloc ( (void**)&xval_n_d, sizeof(float)*nv_g_h*nv_g_h );
float *yval_n_d; cudaMalloc ( (void**)&yval_n_d, sizeof(float)*nv_g_h*nv_g_h );
// Allocate memory for the Overlap Area on the host
float *overlap_h; cudaMallocHost ( (void**)&overlap_h, sizeof(float) );
// Allocate memory for the Overlap Area on the device
float *overlap_d; cudaMalloc ( (void**)&overlap_d, sizeof(float) );
// Create the polygon_0 on _host
xval_0_h[0] = -.5; yval_0_h[0] = -.5;
xval_0_h[1] = .5; yval_0_h[1] = -.5;
xval_0_h[2] = .5; yval_0_h[2] = .5;
xval_0_h[3] = -.5; yval_0_h[3] = .5;
// Create the polygon_1 on _host
xval_1_h[0] = 0; yval_1_h[0] = -0.707106781;
xval_1_h[1] = 0.707106781; yval_1_h[1] = 0;
xval_1_h[2] = 0; yval_1_h[2] = 0.707106781;
xval_1_h[3] = -0.707106781; yval_1_h[3] = 0;
// Initilize the new polygon to zero
for(int i = 0; i < nv_g_h*nv_g_h; i++){
xval_n_h[i] = 0;
yval_n_h[i] = 0;
}
cutilSafeCall( cudaMemcpy( xval_n_d, xval_n_h, sizeof(float)*nv_g_h*nv_g_h, cudaMemcpyHostToDevice ) );
cutilSafeCall( cudaMemcpy( yval_n_d, yval_n_h, sizeof(float)*nv_g_h*nv_g_h, cudaMemcpyHostToDevice ) );
// Initilize the overlap area
overlap_h[0] = 0;
cutilSafeCall( cudaMemcpy( overlap_d, overlap_h, sizeof(float)*nv_g_h*nv_g_h, cudaMemcpyHostToDevice ) );
// Create the Angle array that is needed for the device
float *angle_d; cudaMalloc ( (void**)&angle_d, sizeof(float)*nv_g*nv_g );
ComputeOverlap<<<1,1>>>(
xval_0_d, yval_0_d,
xval_1_d, yval_1_d,
xval_n_d, yval_n_d,
nv_g_h,
overlap_d,
angle_d );
cutilSafeCall( cudaMemcpy( overlap_h, overlap_d, sizeof(float), cudaMemcpyDeviceToHost ) );
cout<<overlap_h[0]<<endl<<endl;
// BEGIN DEBUG
cutilSafeCall( cudaMemcpy( xval_n_h, xval_n_d, sizeof(float)*nv_g_h*nv_g_h, cudaMemcpyDeviceToHost ) );
cutilSafeCall( cudaMemcpy( yval_n_h, yval_n_d, sizeof(float)*nv_g_h*nv_g_h, cudaMemcpyDeviceToHost ) );
for(int i = 0; i < nv_g_h*nv_g_h; i++){
cout.precision(2);
cout.setf(ios::fixed,ios::floatfield);
cout<<xval_n_h[i]<<" , "<<yval_n_h[i]<<endl;
}
// END DEBUG
}
// END DEBUG
}[/codebox]
please help, coffee no longer does :wacko:
my suspects are either copying crap back and forth from the device or the the fact that I declared global variables in my kernal.
thanks
thanks - Dominik Gothe