Cannot access to Opengl texture from CUDA

I try to render image to texture and then perform some computation with result, but I constantly get thrash instead of texture. I’ve checked contents of “tex” after rendering to it via glGetTexImage - it contains correct data, but in CUDA there is some trouble:

glGenTextures(1, &tex); 

    glBindTexture(GL_TEXTURE_2D, tex);

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);

    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 

    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 

glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, CurrentWidth, CurrentHeight, 0, GL_RGBA, GL_FLOAT, 0); 

    cudaSetDevice(0);

    cudaGLSetGLDevice(0);

    cudaGraphicsGLRegisterImage(&resource, tex, GL_TEXTURE_2D, cudaGraphicsMapFlagsReadOnly);

    glBindTexture(GL_TEXTURE_2D, 0);

fbo = new GLFBO(GL_COLOR_ATTACHMENT0, tex);

    vao = new GLVAO(1, &BufferIds[0]);

    vao->Bind();

    vbo = new GLVBO(2, &BufferIds[1]);

    vbo->Bind(GL_ARRAY_BUFFER);

    vbo->SetData(GL_ARRAY_BUFFER, sizeof(GLVertex)*(mesh->nVertices+1), vertices, GL_DYNAMIC_DRAW);

    vbo->Bind(GL_ELEMENT_ARRAY_BUFFER);

    vbo->SetData(GL_ELEMENT_ARRAY_BUFFER, sizeof(GLuint)*mesh->nElements, indices, GL_DYNAMIC_DRAW);

    vao->Unbind();

// ... Render image

void GLMain::_RenderScene()

{

    ++FrameCount;

    //render at maximum resolution to render buffer

    glViewport(0, 0, rboWidth, rboHeight);

if(!bDone)

    {

        fbo->Bind();

        glClearColor( 0.0, 0.0, 0.0, 1.0 );

        glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );

        DrawMesh();

        cudaGraphicsMapResources(1, &resource, 0);

        fbo->Unbind();

        cudaArray* ar;

        cudaGraphicsSubResourceGetMappedArray(&ar, resource, 0, 0);

        size_t size = sizeof(float)*rboWidth*rboHeight*4;

        float* initData = (float*)malloc(size);

        memset(initData, 0, size);

        ProcessImage(ar, initData, rboWidth, rboHeight);

    }

//Render to screen

}

//...... Process_Image.cu

texture <float4, cudaTextureType2D, cudaReadModeElementType> tex;

extern "C" void ProcessImage(cudaArray* in, float* _inData, int w, int h )

{   

    cudaBindTextureToArray(tex, in);

    size_t size = sizeof(float)*w*h*4;

    float* inData;

    cudaMalloc(&inData, size);

    dim3 threads(16, 16);

    dim3 blocks(w/threads.x, h/threads.y);

    cuProcessKernel<<<blocks, threads>>>(inData, w, h);

    cudaThreadSynchronize();

    cudaUnbindTexture(tex);

    cudaMemcpy(_inData, inData, size, cudaMemcpyDeviceToHost);

    cudaFree(inData);

}

//...Kernel

texture <float4, cudaTextureType2D, cudaReadModeElementType> tex;

__global__ void cuProcessKernel(float* initialData, int w, int h)

{

    int x = blockIdx.x*blockDim.x + threadIdx.x;

    int y = blockIdx.y*blockDim.y + threadIdx.y;

    int idx = y * w + x;

    float4 pix = tex2D(tex, x, y);

    initialData[idx*4] = pix.x;

    initialData[idx*4+1] = pix.y;

    initialData[idx*4+2] = pix.z;

    initialData[idx*4+3] = pix.w;

}

Documentation says that binding to texture is very simple and there is a lot of examples, but I fail for some reason. Could someone tell me what to do ?