I have run into this rather strange problem with cudaMemcpyToSymbol while working on a very large code. I noticed that some of the device constants were getting populated with zeroes. The strangest part is that it was only a few of the device constants that were getting populated with zeros, some were populated with the correct data. Initially all of the calls to cudaMemcpyToSymbol were wrapped in CUDA_SAFE_CALL macros. However I noticed that if I remove the CUDA_SAFE_CALL wrapper from the call the data was transferred correctly. The most peculiar aspect of this is that when compiling in release mode the CUDA_SAFE_CALL should expand to nothing. However, the presence of CUDA_SAFE_CALL, even when compiling in release mode, still populated device constant memory with zeros.
No error messages are returned in any case, the data is simply not there. I am assuming that this must be some error in my code that I am overlooking but I am at a loss to find it. If anyone can reproduce this error I would be very interested to hear about it. In fact any insight would be appreciated.
The following is the output of the deviceQuery sdk example.
There is 1 device supporting CUDA
[codebox]Device 0: “Quadro FX 3600M”
Major revision number: 1
Minor revision number: 1
Total amount of global memory: 536543232 bytes
Number of multiprocessors: 16
Number of cores: 128
Total amount of constant memory: 65536 bytes
Total amount of shared memory per block: 16384 bytes
Total number of registers available per block: 8192
Warp size: 32
Maximum number of threads per block: 512
Maximum sizes of each dimension of a block: 512 x 512 x 64
Maximum sizes of each dimension of a grid: 65535 x 65535 x 1
Maximum memory pitch: 262144 bytes
Texture alignment: 256 bytes
Clock rate: 1.25 GHz
Concurrent copy and execution: Yes
Test PASSED
Press ENTER to exit…
[/codebox]
I am developing on windows XP SP3, on an Intel Core2Duo CPU, T9300 @2.5GHz, 3.00 GB of Ram. I am using Visual Studio 2005 professional. The following code was inserted into the template project provided with the SDK and compiled in both release mode and debug mode. In both modes, on my machine, if compiled and run with CUDA_SAFE_CALL present the output does not match, If CUDA_SAFE_CALL is removed the output matched. I have cuda 2.0 installed.
[codebox]// macros
#define ABS(x) ((x<0)?-x:x);
#define error(…) {\
fprintf(stderr, __VA_ARGS__);\
exit(1);\
}
#define ERR strerror(errno)
/other usefull parameters/
#define M 10
#define NC0_B 7
#define NC0 (1<<NC0_B)
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
// includes, project
#include <cutil.h>
//device version of data
device constant short int d_constant[NC0*M];
//HOST version of data
short int h_var[NC0][M] = { /* Q13 */
{ 1486, 2168, 3751, 9074, 12134, 13944, 17983, 19173, 21190, 21820},
{ 1730, 2640, 3450, 4870, 6126, 7876, 15644, 17817, 20294, 21902},
{ 1568, 2256, 3088, 4874, 11063, 13393, 18307, 19293, 21109, 21741},
{ 1733, 2512, 3357, 4708, 6977, 10296, 17024, 17956, 19145, 20350},
{ 1744, 2436, 3308, 8731, 10432, 12007, 15614, 16639, 21359, 21913},
{ 1786, 2369, 3372, 4521, 6795, 12963, 17674, 18988, 20855, 21640},
{ 1631, 2433, 3361, 6328, 10709, 12013, 13277, 13904, 19441, 21088},
{ 1489, 2364, 3291, 6250, 9227, 10403, 13843, 15278, 17721, 21451},
{ 1869, 2533, 3475, 4365, 9152, 14513, 15908, 17022, 20611, 21411},
{ 2070, 3025, 4333, 5854, 7805, 9231, 10597, 16047, 20109, 21834},
{ 1910, 2673, 3419, 4261, 11168, 15111, 16577, 17591, 19310, 20265},
{ 1141, 1815, 2624, 4623, 6495, 9588, 13968, 16428, 19351, 21286},
{ 2192, 3171, 4707, 5808, 10904, 12500, 14162, 15664, 21124, 21789},
{ 1286, 1907, 2548, 3453, 9574, 11964, 15978, 17344, 19691, 22495},
{ 1921, 2720, 4604, 6684, 11503, 12992, 14350, 15262, 16997, 20791},
{ 2052, 2759, 3897, 5246, 6638, 10267, 15834, 16814, 18149, 21675},
{ 1798, 2497, 5617, 11449, 13189, 14711, 17050, 18195, 20307, 21182},
{ 1009, 1647, 2889, 5709, 9541, 12354, 15231, 18494, 20966, 22033},
{ 3016, 3794, 5406, 7469, 12488, 13984, 15328, 16334, 19952, 20791},
{ 2203, 3040, 3796, 5442, 11987, 13512, 14931, 16370, 17856, 18803},
{ 2912, 4292, 7988, 9572, 11562, 13244, 14556, 16529, 20004, 21073},
{ 2861, 3607, 5923, 7034, 9234, 12054, 13729, 18056, 20262, 20974},
{ 3069, 4311, 5967, 7367, 11482, 12699, 14309, 16233, 18333, 19172},
{ 2434, 3661, 4866, 5798, 10383, 11722, 13049, 15668, 18862, 19831},
{ 2020, 2605, 3860, 9241, 13275, 14644, 16010, 17099, 19268, 20251},
{ 1877, 2809, 3590, 4707, 11056, 12441, 15622, 17168, 18761, 19907},
{ 2107, 2873, 3673, 5799, 13579, 14687, 15938, 17077, 18890, 19831},
{ 1612, 2284, 2944, 3572, 8219, 13959, 15924, 17239, 18592, 20117},
{ 2420, 3156, 6542, 10215, 12061, 13534, 15305, 16452, 18717, 19880},
{ 1667, 2612, 3534, 5237, 10513, 11696, 12940, 16798, 18058, 19378},
{ 2388, 3017, 4839, 9333, 11413, 12730, 15024, 16248, 17449, 18677},
{ 1875, 2786, 4231, 6320, 8694, 10149, 11785, 17013, 18608, 19960},
{ 679, 1411, 4654, 8006, 11446, 13249, 15763, 18127, 20361, 21567},
{ 1838, 2596, 3578, 4608, 5650, 11274, 14355, 15886, 20579, 21754},
{ 1303, 1955, 2395, 3322, 12023, 13764, 15883, 18077, 20180, 21232},
{ 1438, 2102, 2663, 3462, 8328, 10362, 13763, 17248, 19732, 22344},
{ 860, 1904, 6098, 7775, 9815, 12007, 14821, 16709, 19787, 21132},
{ 1673, 2723, 3704, 6125, 7668, 9447, 13683, 14443, 20538, 21731},
{ 1246, 1849, 2902, 4508, 7221, 12710, 14835, 16314, 19335, 22720},
{ 1525, 2260, 3862, 5659, 7342, 11748, 13370, 14442, 18044, 21334},
{ 1196, 1846, 3104, 7063, 10972, 12905, 14814, 17037, 19922, 22636},
{ 2147, 3106, 4475, 6511, 8227, 9765, 10984, 12161, 18971, 21300},
{ 1585, 2405, 2994, 4036, 11481, 13177, 14519, 15431, 19967, 21275},
{ 1778, 2688, 3614, 4680, 9465, 11064, 12473, 16320, 19742, 20800},
{ 1862, 2586, 3492, 6719, 11708, 13012, 14364, 16128, 19610, 20425},
{ 1395, 2156, 2669, 3386, 10607, 12125, 13614, 16705, 18976, 21367},
{ 1444, 2117, 3286, 6233, 9423, 12981, 14998, 15853, 17188, 21857},
{ 2004, 2895, 3783, 4897, 6168, 7297, 12609, 16445, 19297, 21465},
{ 1495, 2863, 6360, 8100, 11399, 14271, 15902, 17711, 20479, 22061},
{ 2484, 3114, 5718, 7097, 8400, 12616, 14073, 14847, 20535, 21396},
{ 2424, 3277, 5296, 6284, 11290, 12903, 16022, 17508, 19333, 20283},
{ 2565, 3778, 5360, 6989, 8782, 10428, 14390, 15742, 17770, 21734},
{ 2727, 3384, 6613, 9254, 10542, 12236, 14651, 15687, 20074, 21102},
{ 1916, 2953, 6274, 8088, 9710, 10925, 12392, 16434, 20010, 21183},
{ 3384, 4366, 5349, 7667, 11180, 12605, 13921, 15324, 19901, 20754},
{ 3075, 4283, 5951, 7619, 9604, 11010, 12384, 14006, 20658, 21497},
{ 1751, 2455, 5147, 9966, 11621, 13176, 14739, 16470, 20788, 21756},
{ 1442, 2188, 3330, 6813, 8929, 12135, 14476, 15306, 19635, 20544},
{ 2294, 2895, 4070, 8035, 12233, 13416, 14762, 17367, 18952, 19688},
{ 1937, 2659, 4602, 6697, 9071, 12863, 14197, 15230, 16047, 18877},
{ 2071, 2663, 4216, 9445, 10887, 12292, 13949, 14909, 19236, 20341},
{ 1740, 2491, 3488, 8138, 9656, 11153, 13206, 14688, 20896, 21907},
{ 2199, 2881, 4675, 8527, 10051, 11408, 14435, 15463, 17190, 20597},
{ 1943, 2988, 4177, 6039, 7478, 8536, 14181, 15551, 17622, 21579},
{ 1825, 3175, 7062, 9818, 12824, 15450, 18330, 19856, 21830, 22412},
{ 2464, 3046, 4822, 5977, 7696, 15398, 16730, 17646, 20588, 21320},
{ 2550, 3393, 5305, 6920, 10235, 14083, 18143, 19195, 20681, 21336},
{ 3003, 3799, 5321, 6437, 7919, 11643, 15810, 16846, 18119, 18980},
{ 3455, 4157, 6838, 8199, 9877, 12314, 15905, 16826, 19949, 20892},
{ 3052, 3769, 4891, 5810, 6977, 10126, 14788, 15990, 19773, 20904},
{ 3671, 4356, 5827, 6997, 8460, 12084, 14154, 14939, 19247, 20423},
{ 2716, 3684, 5246, 6686, 8463, 10001, 12394, 14131, 16150, 19776},
{ 1945, 2638, 4130, 7995, 14338, 15576, 17057, 18206, 20225, 20997},
{ 2304, 2928, 4122, 4824, 5640, 13139, 15825, 16938, 20108, 21054},
{ 1800, 2516, 3350, 5219, 13406, 15948, 17618, 18540, 20531, 21252},
{ 1436, 2224, 2753, 4546, 9657, 11245, 15177, 16317, 17489, 19135},
{ 2319, 2899, 4980, 6936, 8404, 13489, 15554, 16281, 20270, 20911},
{ 2187, 2919, 4610, 5875, 7390, 12556, 14033, 16794, 20998, 21769},
{ 2235, 2923, 5121, 6259, 8099, 13589, 15340, 16340, 17927, 20159},
{ 1765, 2638, 3751, 5730, 7883, 10108, 13633, 15419, 16808, 18574},
{ 3460, 5741, 9596, 11742, 14413, 16080, 18173, 19090, 20845, 21601},
{ 3735, 4426, 6199, 7363, 9250, 14489, 16035, 17026, 19873, 20876},
{ 3521, 4778, 6887, 8680, 12717, 14322, 15950, 18050, 20166, 21145},
{ 2141, 2968, 6865, 8051, 10010, 13159, 14813, 15861, 17528, 18655},
{ 4148, 6128, 9028, 10871, 12686, 14005, 15976, 17208, 19587, 20595},
{ 4403, 5367, 6634, 8371, 10163, 11599, 14963, 16331, 17982, 18768},
{ 4091, 5386, 6852, 8770, 11563, 13290, 15728, 16930, 19056, 20102},
{ 2746, 3625, 5299, 7504, 10262, 11432, 13172, 15490, 16875, 17514},
{ 2248, 3556, 8539, 10590, 12665, 14696, 16515, 17824, 20268, 21247},
{ 1279, 1960, 3920, 7793, 10153, 14753, 16646, 18139, 20679, 21466},
{ 2440, 3475, 6737, 8654, 12190, 14588, 17119, 17925, 19110, 19979},
{ 1879, 2514, 4497, 7572, 10017, 14948, 16141, 16897, 18397, 19376},
{ 2804, 3688, 7490, 10086, 11218, 12711, 16307, 17470, 20077, 21126},
{ 2023, 2682, 3873, 8268, 10255, 11645, 15187, 17102, 18965, 19788},
{ 2823, 3605, 5815, 8595, 10085, 11469, 16568, 17462, 18754, 19876},
{ 2851, 3681, 5280, 7648, 9173, 10338, 14961, 16148, 17559, 18474},
{ 1348, 2645, 5826, 8785, 10620, 12831, 16255, 18319, 21133, 22586},
{ 2141, 3036, 4293, 6082, 7593, 10629, 17158, 18033, 21466, 22084},
{ 1608, 2375, 3384, 6878, 9970, 11227, 16928, 17650, 20185, 21120},
{ 2774, 3616, 5014, 6557, 7788, 8959, 17068, 18302, 19537, 20542},
{ 1934, 4813, 6204, 7212, 8979, 11665, 15989, 17811, 20426, 21703},
{ 2288, 3507, 5037, 6841, 8278, 9638, 15066, 16481, 21653, 22214},
{ 2951, 3771, 4878, 7578, 9016, 10298, 14490, 15242, 20223, 20990},
{ 3256, 4791, 6601, 7521, 8644, 9707, 13398, 16078, 19102, 20249},
{ 1827, 2614, 3486, 6039, 12149, 13823, 16191, 17282, 21423, 22041},
{ 1000, 1704, 3002, 6335, 8471, 10500, 14878, 16979, 20026, 22427},
{ 1646, 2286, 3109, 7245, 11493, 12791, 16824, 17667, 18981, 20222},
{ 1708, 2501, 3315, 6737, 8729, 9924, 16089, 17097, 18374, 19917},
{ 2623, 3510, 4478, 5645, 9862, 11115, 15219, 18067, 19583, 20382},
{ 2518, 3434, 4728, 6388, 8082, 9285, 13162, 18383, 19819, 20552},
{ 1726, 2383, 4090, 6303, 7805, 12845, 14612, 17608, 19269, 20181},
{ 2860, 3735, 4838, 6044, 7254, 8402, 14031, 16381, 18037, 19410},
{ 4247, 5993, 7952, 9792, 12342, 14653, 17527, 18774, 20831, 21699},
{ 3502, 4051, 5680, 6805, 8146, 11945, 16649, 17444, 20390, 21564},
{ 3151, 4893, 5899, 7198, 11418, 13073, 15124, 17673, 20520, 21861},
{ 3960, 4848, 5926, 7259, 8811, 10529, 15661, 16560, 18196, 20183},
{ 4499, 6604, 8036, 9251, 10804, 12627, 15880, 17512, 20020, 21046},
{ 4251, 5541, 6654, 8318, 9900, 11686, 15100, 17093, 20572, 21687},
{ 3769, 5327, 7865, 9360, 10684, 11818, 13660, 15366, 18733, 19882},
{ 3083, 3969, 6248, 8121, 9798, 10994, 12393, 13686, 17888, 19105},
{ 2731, 4670, 7063, 9201, 11346, 13735, 16875, 18797, 20787, 22360},
{ 1187, 2227, 4737, 7214, 9622, 12633, 15404, 17968, 20262, 23533},
{ 1911, 2477, 3915, 10098, 11616, 12955, 16223, 17138, 19270, 20729},
{ 1764, 2519, 3887, 6944, 9150, 12590, 16258, 16984, 17924, 18435},
{ 1400, 3674, 7131, 8718, 10688, 12508, 15708, 17711, 19720, 21068},
{ 2322, 3073, 4287, 8108, 9407, 10628, 15862, 16693, 19714, 21474},
{ 2630, 3339, 4758, 8360, 10274, 11333, 12880, 17374, 19221, 19936},
{ 1721, 2577, 5553, 7195, 8651, 10686, 15069, 16953, 18703, 19929}
};
////////////////////////////////////////////////////////////////////////////////
// declaration, forward
void runTest( int argc, char** argv);
void print_Word16_GPU_Constant(const char*var_name,int nElems);
void compare_device_host_constants(short inth_var,const char var_name,int nElems);
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv)
{
runTest( argc, argv);
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void
runTest( int argc, char** argv)
{
CUT_DEVICE_INIT(argc, argv);
short int *temp;
temp = &(h_var[0][0]);
(cudaMemcpyToSymbol("d_constant", temp, // flattened from a 2D array for use with cuda
NC0*M*sizeof(short int), 0, cudaMemcpyHostToDevice));
printf("\n***********************\n\nComparing Host and Device Constants\n\n************************\n\n");
// compare the device and host data
compare_device_host_constants(&(h_var[0][0]),"d_constant",NC0*M);
//print the device data
print_Word16_GPU_Constant("d_constant",NC0*M);
//check for errors
printf("GetLastError returned: %s\n",cudaGetErrorString(cudaGetLastError()));
}
void compare_device_host_constants(short inth_var,const char var_name,int nElems){
short int* h_constant;
int diff = 0;
int temp = 0;
h_constant = (short int*)malloc(nElems*sizeof(short int));
if (!h_constant) {
error("malloc h_constant: %s\n", ERR);
}
CUDA_SAFE_CALL(cudaMemcpyFromSymbol(h_constant, var_name,
nElems*sizeof(short int), 0, cudaMemcpyDeviceToHost));
for(int i = 0;i<nElems;i++){
temp = h_constant[i]-h_var[i];
diff = diff+ABS(temp);
}
if(diff != 0){
printf("***%s does not match Host\n",var_name);
}else{
printf("%s matches Host\n",var_name);
}
free(h_constant);
}
void print_Word16_GPU_Constant(const char*var_name,int nElems){
//bring all the data back from the gpu to the cpu and print it allong with the cpu version
short int* h_constant;
h_constant = (short int*)malloc(nElems*sizeof(short int));
if (!h_constant) {
error("malloc h_constant: %s\n", ERR);
}
CUDA_SAFE_CALL(cudaMemcpyFromSymbol(h_constant, var_name,
nElems*sizeof(short int), 0, cudaMemcpyDeviceToHost));
printf("\ndevice %s\n",var_name);
for(int i = 0;i<nElems;i++){
printf("%d ,", h_constant[i]);
}
printf("\n");
free(h_constant);
}
[/codebox]
This version of the code is the working version. To get the device memory populated with zeros, change the line
[codebox] (cudaMemcpyToSymbol(“d_constant”, temp, // flattened from a 2D array for use with cuda
NC0*M*sizeof(short int), 0, cudaMemcpyHostToDevice));
[/codebox]
to
[codebox] CUDA_SAFE_CALL(cudaMemcpyToSymbol(“d_constant”, temp, // flattened from a 2D array for use with cuda
NC0*M*sizeof(short int), 0, cudaMemcpyHostToDevice));
[/codebox]