CUDA RC 7.0 compilation error c++11 (ubuntu 14.04)

Compilation error of sample from http://devblogs.nvidia.com/parallelforall/cuda-7-release-candidate-feature-overview/

c++11_cuda.cu

#include <initializer_list>
#include <iostream>
#include <cstring>

// Generic parallel find routine. Threads search through the
// array in parallel. A thread returns the index of the 
// first value it finds that satisfies predicate `p`, or -1.
template <typename T, typename Predicate>
__device__ int find(T *data, int n, Predicate p)
{
    for (int i = blockIdx.x * blockDim.x + threadIdx.x;
         i < n;
         i += blockDim.x * gridDim.x)
    {
        if (p(data[i])) return i;
    }
    return -1;
}

// Use find with a lambda function that searches for x, y, z
// or w. Note the use of range-based for loop and 
// initializer_list inside the functor, and auto means we 
// don't have to know the type of the lambda or the array
__global__
void xyzw_frequency(unsigned int *count, char *data, int n)
{
    auto match_xyzw = [](char c) {
      for (const auto x : { 'x','y','z','w' }) 
        if (c == x) return true;
      return false;
    };

    int i = find(data, n, match_xyzw);

    if (i >= 0) atomicAdd(count, 1);
}

int main(void)
{   
    char text[] = "zebra xylophone wax";
    char *d_text;

    cudaMalloc(&d_text, sizeof(text));
    cudaMemcpy(d_text, text, sizeof(text), 
               cudaMemcpyHostToDevice);
    
    unsigned int *d_count;
    cudaMalloc(&d_count, sizeof(unsigned int));
    cudaMemset(d_count, 0, sizeof(unsigned int));

    xyzw_frequency<<<1, 64>>>(d_count, d_text, 
                              strlen(text));

    unsigned int count;
    cudaMemcpy(&count, d_count, sizeof(unsigned int), 
               cudaMemcpyDeviceToHost);

    std::cout << count << " instances of 'x', 'y', 'z', "
              << "in " << text << std::endl;

    cudaFree(d_count);
    cudaFree(d_text);

    return 0;
}

$ /usr/local/cuda-7.0/bin/nvcc -std=c++11 c++11_cuda.cu -o c++11_cuda
c++11_cuda.cu(28): internal error: assertion failed: remove_from_variables_list: not found (/dvs/p4/build/sw/rel/gpu_drv/r346/r346_00/drivers/compiler/edg/EDG_4.9/src/il.c, line 13467)

1 catastrophic error detected in the compilation of “/tmp/tmpxft_00001d59_00000000-9_c++11_cuda.cpp1.ii”.
Compilation aborted.
Aborted (core dumped)


Compilation without error
test_auto.cu

#include <cuda.h>
#include <iostream>

__host__ void test() {
  float a = 42;
  auto b = a;
  std::cout << b << std::endl;
}

int main()
{
  test();
  return 0;
}

/usr/local/cuda-7.0/bin/nvcc -std=c++11 test_auto.cu -o test_auto ./test_auto
42

============================================
GPU

CUDA Device Query (Runtime API) version (CUDART static linking)

Detected 1 CUDA Capable device(s)

Device 0: “Quadro K600”
CUDA Driver Version / Runtime Version 7.0 / 7.0
CUDA Capability Major/Minor version number: 3.0
Total amount of global memory: 1023 MBytes (1073020928 bytes)
( 1) Multiprocessors, (192) CUDA Cores/MP: 192 CUDA Cores
GPU Clock rate: 876 MHz (0.88 GHz)
Memory Clock rate: 891 Mhz
Memory Bus Width: 128-bit
L2 Cache Size: 262144 bytes
Maximum Texture Dimension Size (x,y,z) 1D=(65536), 2D=(65536, 65536), 3D=(4096, 4096, 4096)
Maximum Layered 1D Texture Size, (num) layers 1D=(16384), 2048 layers
Maximum Layered 2D Texture Size, (num) layers 2D=(16384, 16384), 2048 layers
Total amount of constant memory: 65536 bytes
Total amount of shared memory per block: 49152 bytes
Total number of registers available per block: 65536
Warp size: 32
Maximum number of threads per multiprocessor: 2048
Maximum number of threads per block: 1024
Max dimension size of a thread block (x,y,z): (1024, 1024, 64)
Max dimension size of a grid size (x,y,z): (2147483647, 65535, 65535)
Maximum memory pitch: 2147483647 bytes
Texture alignment: 512 bytes
Concurrent copy and kernel execution: Yes with 1 copy engine(s)
Run time limit on kernels: Yes
Integrated GPU sharing Host Memory: No
Support host page-locked memory mapping: Yes
Alignment requirement for Surfaces: Yes
Device has ECC support: Disabled
Device supports Unified Addressing (UVA): Yes
Device PCI Domain ID / Bus ID / location ID: 0 / 1 / 0
Compute Mode:
< Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >

deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 7.0, CUDA Runtime Version = 7.0, NumDevs = 1, Device0 = Quadro K600
Result = PASS

system

Linux sneg 3.13.0-44-generic #73-Ubuntu SMP Tue Dec 16 00:22:43 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux

Distributor ID: Ubuntu
Description: Ubuntu 14.04.1 LTS
Release: 14.04
Codename: trusty

g++ -v
Using built-in specs.
COLLECT_GCC=g++
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/4.8/lto-wrapper
Target: x86_64-linux-gnu
Configured with: …/src/configure -v --with-pkgversion=‘Ubuntu 4.8.2-19ubuntu1’ --with-bugurl=file:///usr/share/doc/gcc-4.8/README.Bugs --enable-languages=c,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-4.8 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.8 --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --enable-gnu-unique-object --disable-libmudflap --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-4.8-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-4.8-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-4.8-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)

install

sudo dpkg -i cuda-repo-ubuntu1404-7-0-rc_7.0-18_amd64.deb
sudo apt-get update
sudo apt-get install cuda

Access Denied

You don’t have permission to access “http://developer.nvidia.com/nvbugs/cuda/add” on this server.
Reference #18.1e841402.1422481441.16a44b84