#include #define N (size_t)200 __device__ char sharedMem[N]; __device__ void upperCase(size_t i){ sharedMem[i] -= 32; } __device__ void initSharedMem(size_t i){ sharedMem[i]=(char)'-'; } __global__ void initializeElements( char *d_a) { size_t i = blockIdx.x * blockDim.x +threadIdx.x; if ((i<(size_t)123) &&(i>(size_t)96)) { d_a[i] = (char)i; printf("Block %d Thread %d: init globMem[%zu]=%c\n",blockIdx.x,threadIdx.x,i,(char)d_a[i]); initSharedMem(i); __syncthreads(); printf("Block %d Thread %d: shared mem[%zu]=%c\n",blockIdx.x,threadIdx.x,i,(char)sharedMem[i]); sharedMem[i]=d_a[i]; printf("Block %d Thread %d: after copy globMem to shared mem, shared mem[%zu]=%c\n",blockIdx.x,threadIdx.x,i,sharedMem[i]); __syncthreads(); upperCase(i); printf("Block %d Thread %d: after doing UpperCase(), shared mem[%zu]=%c\n",blockIdx.x,threadIdx.x,i,sharedMem[i]); __syncthreads(); d_a[i]=sharedMem[i]; printf("Block %d Thread %d: glob mem[%zu]=%c\n",blockIdx.x,threadIdx.x,i,d_a[i]); } } int main() { char *d_a,*a; size_t size = N * sizeof(char); cudaMalloc(&d_a, size); a=(char *)malloc(size); size_t threads_per_block = 128; size_t number_of_blocks = (size_t)((N + threads_per_block-1)/threads_per_block); initializeElementsTo<<>>(d_a); cudaDeviceSynchronize(); cudaMemcpy(a,d_a,size,cudaMemcpyDeviceToHost); for (size_t i = 0; i < N; ++i) if ((i<(size_t)123) &&(i>(size_t)96)) printf("HostMem[%zu]=%c\n",i,a[i]); cudaFree(d_a); free(a); } Output: (see below) From the output seen below, the "if" in the device code and in the host code work well since the output displayed is for the specific range of addresses that the "if" allows. But the contents of the global memory and shared memory are incorrect as displayed (compare this to the file "int.txt" where you can see the correctly displayed characters). Interestingly, the Host memory still ends up having the correct character values. So perhaps the problem is the "printf" statements in the GPU code? I am using %zu for the format since from what I can tell is needed to display a size_t value. Block 0 Thread 97: init globMem[97]= Block 0 Thread 98: init globMem[98]= Block 0 Thread 99: init globMem[99]= Block 0 Thread 100: init globMem[100]= Block 0 Thread 101: init globMem[101]= Block 0 Thread 102: init globMem[102]= Block 0 Thread 103: init globMem[103]= Block 0 Thread 104: init globMem[104]= Block 0 Thread 105: init globMem[105]= Block 0 Thread 106: init globMem[106]= Block 0 Thread 107: init globMem[107]= Block 0 Thread 108: init globMem[108]= Block 0 Thread 109: init globMem[109]= Block 0 Thread 110: init globMem[110]= Block 0 Thread 111: init globMem[111]= Block 0 Thread 112: init globMem[112]= Block 0 Thread 113: init globMem[113]= Block 0 Thread 114: init globMem[114]= Block 0 Thread 115: init globMem[115]= Block 0 Thread 116: init globMem[116]= Block 0 Thread 117: init globMem[117]= Block 0 Thread 118: init globMem[118]= Block 0 Thread 119: init globMem[119]= Block 0 Thread 120: init globMem[120]= Block 0 Thread 121: init globMem[121]= Block 0 Thread 122: init globMem[122]= Block 0 Thread 97: shared mem[97]= Block 0 Thread 98: shared mem[98]= Block 0 Thread 99: shared mem[99]= Block 0 Thread 100: shared mem[100]= Block 0 Thread 101: shared mem[101]= Block 0 Thread 102: shared mem[102]= Block 0 Thread 103: shared mem[103]= Block 0 Thread 104: shared mem[104]= Block 0 Thread 105: shared mem[105]= Block 0 Thread 106: shared mem[106]= Block 0 Thread 107: shared mem[107]= Block 0 Thread 108: shared mem[108]= Block 0 Thread 109: shared mem[109]= Block 0 Thread 110: shared mem[110]= Block 0 Thread 111: shared mem[111]= Block 0 Thread 112: shared mem[112]= Block 0 Thread 113: shared mem[113]= Block 0 Thread 114: shared mem[114]= Block 0 Thread 115: shared mem[115]= Block 0 Thread 116: shared mem[116]= Block 0 Thread 117: shared mem[117]= Block 0 Thread 118: shared mem[118]= Block 0 Thread 119: shared mem[119]= Block 0 Thread 120: shared mem[120]= Block 0 Thread 121: shared mem[121]= Block 0 Thread 122: shared mem[122]= Block 0 Thread 97: after copy globMem to shared mem, shared mem[97]= Block 0 Thread 98: after copy globMem to shared mem, shared mem[98]= Block 0 Thread 99: after copy globMem to shared mem, shared mem[99]= Block 0 Thread 100: after copy globMem to shared mem, shared mem[100]= Block 0 Thread 101: after copy globMem to shared mem, shared mem[101]= Block 0 Thread 102: after copy globMem to shared mem, shared mem[102]= Block 0 Thread 103: after copy globMem to shared mem, shared mem[103]= Block 0 Thread 104: after copy globMem to shared mem, shared mem[104]= Block 0 Thread 105: after copy globMem to shared mem, shared mem[105]= Block 0 Thread 106: after copy globMem to shared mem, shared mem[106]= Block 0 Thread 107: after copy globMem to shared mem, shared mem[107]= Block 0 Thread 108: after copy globMem to shared mem, shared mem[108]= Block 0 Thread 109: after copy globMem to shared mem, shared mem[109]= Block 0 Thread 110: after copy globMem to shared mem, shared mem[110]= Block 0 Thread 111: after copy globMem to shared mem, shared mem[111]= Block 0 Thread 112: after copy globMem to shared mem, shared mem[112]= Block 0 Thread 113: after copy globMem to shared mem, shared mem[113]= Block 0 Thread 114: after copy globMem to shared mem, shared mem[114]= Block 0 Thread 115: after copy globMem to shared mem, shared mem[115]= Block 0 Thread 116: after copy globMem to shared mem, shared mem[116]= Block 0 Thread 117: after copy globMem to shared mem, shared mem[117]= Block 0 Thread 118: after copy globMem to shared mem, shared mem[118]= Block 0 Thread 119: after copy globMem to shared mem, shared mem[119]= Block 0 Thread 120: after copy globMem to shared mem, shared mem[120]= Block 0 Thread 121: after copy globMem to shared mem, shared mem[121]= Block 0 Thread 122: after copy globMem to shared mem, shared mem[122]= Block 0 Thread 97: after doing UpperCase(), shared mem[97]= Block 0 Thread 98: after doing UpperCase(), shared mem[98]= Block 0 Thread 99: after doing UpperCase(), shared mem[99]= Block 0 Thread 100: after doing UpperCase(), shared mem[100]= Block 0 Thread 101: after doing UpperCase(), shared mem[101]= Block 0 Thread 102: after doing UpperCase(), shared mem[102]= Block 0 Thread 103: after doing UpperCase(), shared mem[103]= Block 0 Thread 104: after doing UpperCase(), shared mem[104]= Block 0 Thread 105: after doing UpperCase(), shared mem[105]= Block 0 Thread 106: after doing UpperCase(), shared mem[106]= Block 0 Thread 107: after doing UpperCase(), shared mem[107]= Block 0 Thread 108: after doing UpperCase(), shared mem[108]= Block 0 Thread 109: after doing UpperCase(), shared mem[109]= Block 0 Thread 110: after doing UpperCase(), shared mem[110]= Block 0 Thread 111: after doing UpperCase(), shared mem[111]= Block 0 Thread 112: after doing UpperCase(), shared mem[112]= Block 0 Thread 113: after doing UpperCase(), shared mem[113]= Block 0 Thread 114: after doing UpperCase(), shared mem[114]= Block 0 Thread 115: after doing UpperCase(), shared mem[115]= Block 0 Thread 116: after doing UpperCase(), shared mem[116]= Block 0 Thread 117: after doing UpperCase(), shared mem[117]= Block 0 Thread 118: after doing UpperCase(), shared mem[118]= Block 0 Thread 119: after doing UpperCase(), shared mem[119]= Block 0 Thread 120: after doing UpperCase(), shared mem[120]= Block 0 Thread 121: after doing UpperCase(), shared mem[121]= Block 0 Thread 122: after doing UpperCase(), shared mem[122]= Block 0 Thread 97: glob mem[97]= Block 0 Thread 98: glob mem[98]= Block 0 Thread 99: glob mem[99]= Block 0 Thread 100: glob mem[100]= Block 0 Thread 101: glob mem[101]= Block 0 Thread 102: glob mem[102]= Block 0 Thread 103: glob mem[103]= Block 0 Thread 104: glob mem[104]= Block 0 Thread 105: glob mem[105]= Block 0 Thread 106: glob mem[106]= Block 0 Thread 107: glob mem[107]= Block 0 Thread 108: glob mem[108]= Block 0 Thread 109: glob mem[109]= Block 0 Thread 110: glob mem[110]= Block 0 Thread 111: glob mem[111]= Block 0 Thread 112: glob mem[112]= Block 0 Thread 113: glob mem[113]= Block 0 Thread 114: glob mem[114]= Block 0 Thread 115: glob mem[115]= Block 0 Thread 116: glob mem[116]= Block 0 Thread 117: glob mem[117]= Block 0 Thread 118: glob mem[118]= Block 0 Thread 119: glob mem[119]= Block 0 Thread 120: glob mem[120]= Block 0 Thread 121: glob mem[121]= Block 0 Thread 122: glob mem[122]= HostMem[97]=A HostMem[98]=B HostMem[99]=C HostMem[100]=D HostMem[101]=E HostMem[102]=F HostMem[103]=G HostMem[104]=H HostMem[105]=I HostMem[106]=J HostMem[107]=K HostMem[108]=L HostMem[109]=M HostMem[110]=N HostMem[111]=O HostMem[112]=P HostMem[113]=Q HostMem[114]=R HostMem[115]=S HostMem[116]=T HostMem[117]=U HostMem[118]=V HostMem[119]=W HostMem[120]=X HostMem[121]=Y HostMem[122]=Z