cudaErrorInvalidResourceHandle on cudaCreateTextureObject

Hi,

I am trying to create simple textures and store data that is not changed over the course of the program. I followed example given in

https://devblogs.nvidia.com/parallelforall/cuda-pro-tip-kepler-texture-objects-improve-performance-and-flexibility/

However I am getting error while trying to create one of the texture object. Also the ones before the problematic one (highlighted in code) are created just fine.

Thank you for any advice as this is my first experience using texture in code.

Error:

========= Program hit cudaErrorInvalidResourceHandle (error 33) due to "invalid resource handle" on CUDA API call to cudaCreateTextureObject. 
=========     Saved host backtrace up to driver entry point at error
=========     Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x984bf3]

Code:

int *d_idxs_m;

        int* mask;

        float* weight;
        
        float* data1d_managed;

	/*
	 * Variables to keep track of memory used on device.
	 * Depending on the current memory usage we need to restrict
	 * number of threads.
	 */
	size_t memDataSz = 0, memMaskSz = 0, memWtSz = 0, memIdxSz = 0,memdlObjSz = 0, memNNObjSz=0;
	size_t memRsltTSz = 0;

	int TOTAL_GLOBAL_MEMORY = 0;


	Node *result = (Node*) malloc(nelements * sizeof(Node));

	memIdxSz = nrows * sizeof(int);
	CUDA_CHECK_RETURN(cudaMallocManaged(&d_idxs_m, memIdxSz));

	memDataSz = nrows * ncolumns * sizeof(float);
	CUDA_CHECK_RETURN(cudaMallocManaged(&data1d_managed, memDataSz));

	CUDA_CHECK_RETURN(cudaMemcpy(data1d_managed, data, memDataSz, cudaMemcpyHostToDevice));

	memMaskSz = nrows * ncolumns * sizeof(int);

	CUDA_CHECK_RETURN(cudaMallocManaged(&mask, memMaskSz));

	memWtSz = ncolumns * sizeof(float);

	CUDA_CHECK_RETURN(cudaMallocManaged(&weight, memWtSz));

	memRsltTSz = nelements * sizeof(Node);

	CUDA_CHECK_RETURN(cudaMallocManaged(&resultLocal, memRsltTSz));

	memdlObjSz = nelements * sizeof(int);

	CUDA_CHECK_RETURN(cudaMallocManaged(&delObjIdxs, memdlObjSz));

	memNNObjSz = nelements * sizeof(int);

	CUDA_CHECK_RETURN(cudaMallocManaged(&nnObjIdxs, memNNObjSz));

	for (int i = 0; i < ncolumns; i++)
		weight[i] = 1.0;

	assert(mask != NULL);

	for (int i = 0; i < nrows; i++)
		for (int j = 0; j < ncolumns; j++)
			if (data1d_managed[i * ncolumns + j] != 0)
				mask[i * ncolumns + j] = 1;
			else
				mask[i * ncolumns + j] = 0;


	for (int i = 0; i < nrows; i += 1) {
		d_idxs_m[i] = i;
	}



	// create texture object
	cudaResourceDesc resDescData;
	memset(&resDescData, 0, sizeof(resDescData));
	resDescData.resType = cudaResourceTypeLinear;
	resDescData.res.linear.devPtr = data1d_managed;
	resDescData.res.linear.desc.f = cudaChannelFormatKindFloat;
	resDescData.res.linear.desc.x = 32; // bits per channel
	resDescData.res.linear.sizeInBytes = memDataSz;

	cudaResourceDesc resDescMask;
	memset(&resDescMask, 0, sizeof(resDescMask));
	resDescMask.resType = cudaResourceTypeLinear;
	resDescMask.res.linear.devPtr = mask;
	resDescMask.res.linear.desc.f = cudaChannelFormatKindSigned;
	resDescMask.res.linear.desc.x = 32; // bits per channel
	resDescMask.res.linear.sizeInBytes = memMaskSz;

	cudaResourceDesc resDescWt;
	memset(&resDescWt, 0, sizeof(cudaResourceDesc));
	resDescWt.resType = cudaResourceTypeLinear;
	resDescWt.res.linear.devPtr = weight;
	resDescWt.res.linear.desc.f = cudaChannelFormatKindFloat;
	resDescWt.res.linear.desc.x = 32; // bits per channel
	resDescWt.res.linear.sizeInBytes = memWtSz;


	cudaResourceDesc resDescIdx;
	memset(&resDescWt, 0, sizeof(resDescIdx));
	resDescIdx.resType = cudaResourceTypeLinear;
	resDescIdx.res.linear.devPtr = d_idxs_m;
	resDescIdx.res.linear.desc.f = cudaChannelFormatKindSigned;
	resDescIdx.res.linear.desc.x = 32; // bits per channel
	resDescIdx.res.linear.sizeInBytes = memIdxSz;

	cudaTextureDesc texDescData;
	memset(&texDescData, 0, sizeof(texDescData));
	texDescData.readMode = cudaReadModeElementType;

	cudaTextureDesc texDescMask;
	memset(&texDescMask, 0, sizeof(texDescMask));
	texDescMask.readMode = cudaReadModeElementType;

	cudaTextureDesc texDescWt;
	memset(&texDescWt, 0, sizeof(texDescWt));
	texDescWt.readMode = cudaReadModeElementType;

	cudaTextureDesc texDescIdx;
	memset(&texDescIdx, 0, sizeof(texDescIdx));
	texDescIdx.readMode = cudaReadModeElementType;

	// create texture object: we only have to do this once!
	cudaTextureObject_t texData = 0;
	cudaCreateTextureObject(&texData, &resDescData, &texDescData, NULL);
	if (cudaSuccess != (err = cudaGetLastError())) {
		printf("\n@texData error: %s\n",	cudaGetErrorString(err));
		return NULL;
	}

	cudaTextureObject_t texMask = 0;
	cudaCreateTextureObject(&texMask, &resDescMask, &texDescMask, NULL);
	if (cudaSuccess != (err = cudaGetLastError())) {
		printf("\n@texMask error: %s\n", cudaGetErrorString(err)); <<---- GETTING ERROR ON THIS TEXTURE OBJECT
		return NULL;
	}

	cudaTextureObject_t texWt = 0;
	cudaCreateTextureObject(&texWt, &resDescWt, &texDescWt, NULL);
	if (cudaSuccess != (err = cudaGetLastError())) {
		printf("\n@texWt error: %s\n", cudaGetErrorString(err));
		return NULL;
	}

	cudaTextureObject_t texIdx = 0;
	cudaCreateTextureObject(&texIdx, &resDescIdx, &texDescIdx, NULL);
	if (cudaSuccess != (err = cudaGetLastError())) {
		printf("\n@texIdx error: %s\n", cudaGetErrorString(err));
		return NULL;
	}