compiler weirdness...

After adding this debug argument to a function, I get “LaunchError: cuMemcpyDtoH failed: launch failed”. Most of my code has bounds and loop iteration checks in it, so there shouldn’t be any memory corruption within the kernels. I’m sure I’ve cast pointers to int’s before. I’m interested if anyone has seen this before. Apologies in advance if it’s my bug.

With all of the debug code turned on (what I’m running now), the kernel uses 88 [words per thread?] of local memory and 11816 [bytes?] of shared memory. I capped the registers at 16 (it shouldn’t fail regardless).

diff -r 96ab2b4fba37 src/kernels/encode/compress_values.h

--- a/src/kernels/encode/compress_values.h	Fri Apr 10 10:36:02 2009 -0700

+++ b/src/kernels/encode/compress_values.h	Fri Apr 10 10:38:07 2009 -0700

@@ -32,7 +32,7 @@

 };

// nbits will always be > 0

-__device__ uint8_t quantize(float v, float quality, int nbits, int y_idx) {

+__device__ uint8_t quantize(float v, float quality, int nbits, int y_idx, int ptr) {

	 int quantized = __float2int_rn(v * quality);

	 if (nbits >= 8) {

		 quantized = (quantized >> (nbits - 8)) + 127;

@@ -53,6 +53,7 @@

			 _failure_info_i_[6] = write_values_indices[tid].get_offset();

			 _failure_info_i_[7] = bitmap.block_width;

			 _failure_info_i_[8] = bitmap.block_height;

+			_failure_info_i_[9] = ptr;

			 _failure_info_f_[0] = v;

			 float quantizedf = roundf(v * quality);

@@ -95,7 +96,8 @@

		 float *ptr = write_value_info.get_ptr(offset);

		 // TODO - save values from scan kernel.

		 quantized_in[y_idx][tid] = quantize(*ptr, quality,

-			write_values_indices[tid].get_value_nbits(), y_idx);

+			write_values_indices[tid].get_value_nbits(), y_idx,

+			((long)ptr) );

		 y_idx++;

		 y++;