CUDA Beginner Can't get data back from the GPU

Hello all,
I am having trouble retrieving data from the GPU. My host code looks as follows, very simple:

// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

// includes, project
#include <cutil.h>

#include <helloWorld_kernel.cu>

void run(int argc, char** argv);

int main(int argc, char** argv)
{
run(argc, argv);
CUT_EXIT(argc, argv);
}

void run(int argc, char** argv)
{
CUT_DEVICE_INIT();
int i;
cudaError_t err;

int location[32];
int *d_location;

CUDA_SAFE_CALL(cudaMalloc((void **) &d_location, 128));
for(i=0;i<32;i++) location[i]=-1;

err = cudaGetLastError();
if(err!=0) printf("ERROR - memcpy for xyz/alpha not worked:   
%s\n",cudaGetErrorString(err));
CUDA_SAFE_CALL(cudaMemcpy(d_location, location, 128, cudaMemcpyHostToDevice));

//execution parameters
dim3 grid(1);
dim3 threads(32);
helloWorld_kern<<< grid, threads >>>(location);
cudaThreadSynchronize();

// check if kernel execution generated and error
CUT_CHECK_ERROR("Kernel execution failed");
CUDA_SAFE_CALL(cudaMemcpy(location, d_location, 128, cudaMemcpyDeviceToHost));

for(i=0;i<32;i++)
  printf("%d\n", location[i]);

}

and my kernel code looks like below, very simple as well.

////////////////////////////////////////////////////////////////////////////////
global void
helloWorld_kern( int *positions )
{
// Block index, threads
//int bx = blockIdx.x;
int tx = threadIdx.x;

syncthreads();
positions[tx] = tx;
syncthreads();

}

when the code runs…the output is all -1 which i initialized the array to, why are my values unchanged?

you are passing location instead of d_location to the kernel.

Try:
helloWorld_kern<<< grid, threads >>>(d_location);

Thanks! A silly error that i just overlooked.