CUDA Beginner Can't get data back from the GPU

Hello all,
I am having trouble retrieving data from the GPU. My host code looks as follows, very simple:

// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

// includes, project
#include <cutil.h>

#include <>

void run(int argc, char** argv);

int main(int argc, char** argv)
run(argc, argv);
CUT_EXIT(argc, argv);

void run(int argc, char** argv)
int i;
cudaError_t err;

int location[32];
int *d_location;

CUDA_SAFE_CALL(cudaMalloc((void **) &d_location, 128));
for(i=0;i<32;i++) location[i]=-1;

err = cudaGetLastError();
if(err!=0) printf("ERROR - memcpy for xyz/alpha not worked:   
CUDA_SAFE_CALL(cudaMemcpy(d_location, location, 128, cudaMemcpyHostToDevice));

//execution parameters
dim3 grid(1);
dim3 threads(32);
helloWorld_kern<<< grid, threads >>>(location);

// check if kernel execution generated and error
CUT_CHECK_ERROR("Kernel execution failed");
CUDA_SAFE_CALL(cudaMemcpy(location, d_location, 128, cudaMemcpyDeviceToHost));

  printf("%d\n", location[i]);


and my kernel code looks like below, very simple as well.

global void
helloWorld_kern( int *positions )
// Block index, threads
//int bx = blockIdx.x;
int tx = threadIdx.x;

positions[tx] = tx;


when the code runs…the output is all -1 which i initialized the array to, why are my values unchanged?

you are passing location instead of d_location to the kernel.

helloWorld_kern<<< grid, threads >>>(d_location);

Thanks! A silly error that i just overlooked.