In-function initialization of a shared struct produces a memcheck error

I’ve observed a strange issue: when I initialize a shared memory struct variable in a function called from the main kernel, the execution runs fine, but cuda-memcheck throws errors about invalid global access. The initialization works fine and the value is readable, unless I run cuda-memcheck, which crashes it with the error. Below is a simple example that reproduces the problem.
In my (much bigger) code I’m trying to separate initialization in its own function, but cuda-memcheck errors don’t add confidence.
I use CUDA 5.0 on a CentOS 6.3 with the driver version 304.54.
Please let me know what you think.

//main.cpp
#include “gpu.h”

int main(int argc, char* argv){
run_kernel();
return 0;
}

//gpu.h
#ifndef GPU_H_
#define GPU_H_

void run_kernel();

struct cuda_model{
int size;
};
#endif /* GPU_H_ */

//gpu.cu
#include <stdio.h>
#include “gpu.h”

#define FUNCTION 0

device shared cuda_model model;

device void init(cuda_model* model){
if (threadIdx.x == 0){
model->size = 5;
}
__syncthreads();
}

global void compute(){

if (FUNCTION){
	init(&model);
}
else{
	if (threadIdx.x == 0) model.size = 5;

}

}

host void run_kernel(){
compute<<<1,1>>>();
cudaDeviceSynchronize();
}

[sasha@gpudev Debug]$ cuda-memcheck ./Test
========= CUDA-MEMCHECK
========= Invalid global write of size 4
========= at 0x00000088 in /home/sasha/cuda-workspace/Test/Debug/…/gpu.cu:11:init(cuda_model*)
========= by thread (0,0,0) in block (0,0,0)
========= Address 0x01000000 is out of bounds
========= Device Frame:/home/sasha/cuda-workspace/Test/Debug/…/gpu.cu:19:compute(void) (compute(void) : 0x100)
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib64/libcuda.so (cuLaunchKernel + 0x3dc) [0xc9edc]
========= Host Frame:/usr/local/cuda-5.0/lib64/libcudart.so.5.0 [0x11d54]
========= Host Frame:/usr/local/cuda-5.0/lib64/libcudart.so.5.0 (cudaLaunch + 0x182) [0x38152]
========= Host Frame:./Test [0x92a]
========= Host Frame:./Test [0x9b9]
========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xfd) [0x1ecdd]
========= Host Frame:./Test [0x749]

========= Program hit error 4 on CUDA API call to cudaDeviceSynchronize
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib64/libcuda.so [0x26a180]
========= Host Frame:/usr/local/cuda-5.0/lib64/libcudart.so.5.0 (cudaDeviceSynchronize + 0x1dd) [0x412dd]
========= Host Frame:./Test [0x9b9]
========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xfd) [0x1ecdd]
========= Host Frame:./Test [0x749]

========= ERROR SUMMARY: 2 errors