Dear All,
I am stuck with a weird problem (does not happen if I execute the same code on CPU):
When executing the following code, I get the following error in GDB:
"in xGenome::CopyFromGlobal (this=0xffffffff01000000, __xThreadInfo={data = {x = 0, y = 0, z = 0, w = 0}}, __g_ucGenomes=0x200400000 “”)
at kernel.cu:100"
(I marked line 100 in the code).
I am worried that somehow the members of xGenome are not initialised in memory or so? (nested unions)
#include <stdio.h>
#define m_fit_SAFE_MEMORY_MAPPING
#define mAlignedByteLengthGenome 8
#define mWarpSize 32
#define m_fit_THREAD_DIM_X 32
#define m_fit_THREAD_DIM_Y 1
#define m_fit_BLOCK_DIM_X 1
struct xThreadInfo {
ushort4 data;__device__
xThreadInfo(unsigned short __usThreadIdX, unsigned short __usThreadIdY,
unsigned short __usBlockIdX, unsigned short __usBlockIdY);__device__
unsigned short WarpId(void);__device__
unsigned short BankId(void);__device__
unsigned short FlatThreadId(void);__device__
unsigned short FlatBlockId(void);__device__
unsigned short GlobId(unsigned short __usTypeLength);__device__
void __DEBUG_CALL(void);
};
__device__ xThreadInfo::xThreadInfo(unsigned short __usThreadIdX,
unsigned short __usThreadIdY, unsigned short __usBlockIdX,
unsigned short __usBlockIdY) {
this->data.z = threadIdx.y * m_fit_THREAD_DIM_X + threadIdx.x; //Flat Thread ID
this->data.x = this->data.z % mWarpSize; //BankID
this->data.y = (this->data.z - this->data.x) / mWarpSize; //WarpID
this->data.w = blockIdx.y * m_fit_BLOCK_DIM_X + blockIdx.x; //Flat Block ID
}
__device__ unsigned short xThreadInfo::GlobId(unsigned short __usTypeLength) {
return (this->data.w * m_fit_THREAD_DIM_X * m_fit_THREAD_DIM_Y
+ this->data.z) * __usTypeLength;
}
struct xGenome {
union {
unsigned char one_d[mAlignedByteLengthGenome];
} data;
__device__
void CopyFromGlobal(xThreadInfo __xThreadInfo,
unsigned char *__g_ucGenomeSet);__device__
};
struct xGenomeSet {
union {
xGenome multi_d[mWarpSize];
} data;__device__
__device__
void CopyFromGlobal(xThreadInfo __xThreadInfo,
unsigned char *__g_ucGenomeSet);
};
__device__ void xGenomeSet::CopyFromGlobal(xThreadInfo __xThreadInfo,
unsigned char *__g_ucGenomes) {
this->data.multi_d[__xThreadInfo.WarpId()].CopyFromGlobal(__xThreadInfo, __g_ucGenomes);
}
__device__ void xGenome::CopyFromGlobal(xThreadInfo __xThreadInfo,
unsigned char *__g_ucGenomes) {
for (int i = 0; i < mAlignedByteLengthGenome; i += 1) { //LINE 100!!!
this->data.one_d[i] = __g_ucGenomes[__xThreadInfo.GlobId(sizeof(xGenome)) + i];
}
}
This is the executed kernel:
__global__ void multiply_them(unsigned char *dest) //This is the kernel we execute
{
__shared__ xGenomeSet Tmp;
xThreadInfo Tmpa(threadIdx.x, threadIdx.y, blockIdx.x, blockIdx.y);
Tmp.CopyFromGlobal(Tmpa, dest);
}
So that seems to mean that “this” is not a valid memory address - But I do not understand why that is.
I am very frustrated at the moment as my code contains a lot of nested unions.
Please help.
Thanks and regards
Christian