作者:makeSomeThingWt
链接:利用英伟达jetson TX1搭建TensorFlow玩flappy Bird - 知乎
来源:知乎
著作权归作者所有,转载请联系作者获得授权。
TensorFlow搭建网上已经有很教程了。但是基于英伟达的TX1芯片几乎没有。所以本教程基本是我搭建环境几个星期踩的坑。平时都是下班折腾的所以花的时间比较长。
先做好基本环境的搭建,CUDA8.0,cudnn这些tensorflow基本要的。这些用英伟达提供的工具包JetPack 2.3 L4T就可以安装。简单说一下,jetson TX1开发板如何刷机:先断电拔掉电源,用usb线连接到你的电脑上,上电。按一下电源键后,马上按住rec键,中间按一下RST键。两秒后释放REC键,就进入刷机模式。
基本环境搭建好后,因为tx1是arm架构,tensorflow还不支持需要源码编译安装。那么编译tensorflow之前需要安装bazel和protobuf这两个工具。bazel是用来编译tensorflow。具体可以github里找,这里不做详述。
安装protobuf
install deps
cd ~
sudo add-apt-repository ppa:webupd8team/java
sudo apt-get update
sudo apt-get install oracle-java8-installer
sudo apt-get install git zip unzip autoconf automake libtool curl zlib1g-dev maven swig bzip2
#build build protobuf 3.0.0-beta-2 jar
git clone https://github.com/google/protobuf.git
cd protobuf
# autogen.sh downloads broken gmock.zip in d5fb408d
git checkout master
./autogen.sh
git checkout d5fb408d
./configure --prefix=/usr
make -j 4
sudo make install
cd java
#如果下载包很慢,可以百度一下mvn切换oschina源
mvn package
#注意要切换到0.2.1版本这个分支,因为没有grpc的bug
git clone https://github.com/bazelbuild/bazel.git
cd bazel
git checkout 0.2.1
cp /usr/bin/protoc third_party/protobuf/protoc-linux-arm32.exe
cp ../protobuf/java/target/protobuf-java-3.0.0-beta-2.jar third_party/protobuf/protobuf-java-3.0.0-beta-1.jar
因为github源被GFW限速原因,全程保持翻墙状态味道更佳。注意一点是bazel不支持arm架构需要在源码做改动。
改动bazel源码:找到/src/main/java/com/google/devtools/build/lib/util/CPU.java该目录,看下面代码中+号开头是新增修改的,-号开头是要删掉的。不过玩git的大家伙应该都懂,记得把前面+ -号去掉。
@@ -25,7 +25,7 @@ import java.util.Set;
public enum CPU {
X86_32(“x86_32”, ImmutableSet.of(“i386”, “i486”, “i586”, “i686”, “i786”, “x86”)),
X86_64(“x86_64”, ImmutableSet.of(“amd64”, “x86_64”, “x64”)),
- ARM(“arm”, ImmutableSet.of(“arm”, “armv7l”)),
+ ARM(“arm”, ImmutableSet.of(“arm”, “armv7l”, “aarch64”)),
UNKNOWN(“unknown”, ImmutableSet.of());
然后bazel当前目录执行
./compile.sh
编译
克隆tensorflow项目
git clone -b r0.9 https://github.com/tensorflow/tensorflow.git
./configure
#–jobs 3控制当前执行任务数量,多了cpu吃不消
#–local_resources 2048,.5,1.0 限制占用内存数量,以免溢出
#把你编译好的bazel二进制文件,在bazel项目文件夹的out目录,拷贝到tensorflow
./bazel build -c opt --config=cuda --jobs 3 --verbose_failures --local_resources 2048,.5,1.0 //tensorflow/tools/pip_package:build_pip_package
执行完会报错,我们要.cache里的两个文件一下
cd ~
wget -O config.guess ‘http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD’
wget -O config.sub ‘http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD’
# 注意!!!进到cd .cache/bazel目录下查看你系统对应的目录
cp config.guess ./.cache/bazel/_bazel_socialh/742c01ff0765b098544431b60b1eed9f/external/farmhash_archive/farmhash-34c13ddfab0e35422f4c3979f360635a8c050260/config.guess
cp config.sub ./.cache/bazel/_bazel_socialh/742c01ff0765b098544431b60b1eed9f/external/farmhash_archive/farmhash-34c13ddfab0e35422f4c3979f360635a8c050260/config.sub
编译还不止这些,你需要修改tensorflow一些代码才可行
— a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -985,7 +985,7 @@ tf_kernel_libraries(
“reduction_ops”,
“segment_reduction_ops”,
“sequence_ops”,
- “sparse_matmul_op”,
+ #DC “sparse_matmul_op”,
],
deps = [
“:bounds_check”,
— a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -888,6 +888,9 @@ CudaContext* CUDAExecutor::cuda_context() { return context_; }
// For anything more complicated/prod-focused than this, you’ll likely want to
// turn to gsys’ topology modeling.
static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
+ // DC - make this clever later. ARM has no NUMA node, just return 0
+ LOG(INFO) << “ARM has no NUMA node, hardcoding to return zero”;
+ return 0;
#if defined(APPLE)
LOG(INFO) << “OS X does not support NUMA - returning NUMA node zero”;
return 0;
— a/tensorflow/stream_executor/cuda/cuda_blas.cc
+++ b/tensorflow/stream_executor/cuda/cuda_blas.cc
@@ -25,6 +25,12 @@ limitations under the License.
#define EIGEN_HAS_CUDA_FP16
#endif
+#if CUDA_VERSION >= 8000
+#define SE_CUDA_DATA_HALF CUDA_R_16F
+#else
+#define SE_CUDA_DATA_HALF CUBLAS_DATA_HALF
+#endif
+
#include "tensorflow/stream_executor/cuda/cuda_blas.h"
#include
@@ -1680,10 +1686,10 @@ bool CUDABlas::DoBlasGemm(
return DoBlasInternal(
dynload::cublasSgemmEx, stream, true /* = pointer_mode_host */,
CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k, &alpha,
- CUDAMemory(a), CUBLAS_DATA_HALF, lda,
- CUDAMemory(b), CUBLAS_DATA_HALF, ldb,
+ CUDAMemory(a), SE_CUDA_DATA_HALF, lda,
+ CUDAMemory(b), SE_CUDA_DATA_HALF, ldb,
&beta,
- CUDAMemoryMutable(c), CUBLAS_DATA_HALF, ldc);
+ CUDAMemoryMutable(c), SE_CUDA_DATA_HALF, ldc);
#else
LOG(ERROR) << "fp16 sgemm is not implemented in this cuBLAS version "
<< "(need at least CUDA 7.5)";
--- a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
@@ -104,9 +104,17 @@ struct SparseTensorDenseMatMulFunctor {
int n = (ADJ_B) ? b.dimension(0) : b.dimension(1);
#if !defined(EIGEN_HAS_INDEX_LIST)
- Eigen::Tensor::Dimensions matrix_1_by_nnz{{ 1, nnz }};
- Eigen::array n_by_1{{ n, 1 }};
- Eigen::array reduce_on_rows{{ 0 }};
+ //DC Eigen::Tensor::Dimensions matrix_1_by_nnz{{ 1, nnz }};
+ Eigen::Tensor::Dimensions matrix_1_by_nnz;
+ matrix_1_by_nnz[0] = 1;
+ matrix_1_by_nnz[1] = nnz;
+ //DC Eigen::array n_by_1{{ n, 1 }};
+ Eigen::array n_by_1;
+ n_by_1[0] = n;
+ n_by_1[1] = 1;
+ //DC Eigen::array reduce_on_rows{{ 0 }};
+ Eigen::array reduce_on_rows;
+ reduce_on_rows[0] = 0;
#else
Eigen::IndexList, int> matrix_1_by_nnz;
matrix_1_by_nnz.set(1, nnz);
— a/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc
@@ -43,8 +43,14 @@ struct BatchSelectFunctor {
const int all_but_batch = then_flat_outer_dims.dimension(1);
#if !defined(EIGEN_HAS_INDEX_LIST)
- Eigen::array broadcast_dims{{ 1, all_but_batch }};
- Eigen::Tensor::Dimensions reshape_dims{{ batch, 1 }};
+ //DC Eigen::array broadcast_dims{{ 1, all_but_batch }};
+ Eigen::array broadcast_dims;
+ broadcast_dims[0] = 1;
+ broadcast_dims[1] = all_but_batch;
+ //DC Eigen::Tensor::Dimensions reshape_dims{{ batch, 1 }};
+ Eigen::Tensor::Dimensions reshape_dims;
+ reshape_dims[0] = batch;
+ reshape_dims[1] = 1;
#else
Eigen::IndexList, int> broadcast_dims;
broadcast_dims.set(1, all_but_batch);
— a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1110,7 +1110,7 @@ medium_kernel_test_list = glob([
“kernel_tests/seq2seq_test.py”,
“kernel_tests/slice_op_test.py”,
“kernel_tests/sparse_ops_test.py”,
- “kernel_tests/sparse_matmul_op_test.py”,
+ #DC “kernel_tests/sparse_matmul_op_test.py”,
“kernel_tests/sparse_tensor_dense_matmul_op_test.py”,
])
代码全部修改完后./bazel build -c opt --config=cuda --jobs 3 --verbose_failures --local_resources 2048,.5,1.0 //tensorflow/tools/pip_package:build_pip_package 这里强烈建议翻墙模式下,不然装是装可以就是很恶心。Fu*k GFW。执行完我们要打包出pip 安装包 bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg 然后 cd /tmp/tensorflow_pkg 执行 sudo pip install tensorflow-0.9.0-py2-none-any.whl。这里tensorflow就算安装好了。
为了能利用tensorflow实现深度学习玩flappy Bird,我们需要安装pygame和opencv
安装opencv
sudo apt-get install python-opencv
安装pygame
wget http://www.pygame.org/ftp/pygame-1.9.1release.tar.gz 下载pygame
sudo apt-get install libsdl1.2-dev (SDL安装)
sudo pip install numpy
cd pygame-1.9.1release
python config.py
python setup.py install
克隆flappy Bird
git clone --recursive GitHub - yenchenlin/DeepLearningFlappyBird: Flappy Bird hack using Deep Reinforcement Learning (Deep Q-learning).
cd DeepLearningFlappyBird
python deep_q_network.py
#如果提示出现 linux/videodev.h:No such file or directory error
sudo
apt-get install libv4l-dev
cd
/usr/include/linux
sudo
ln -s …/libv4l1-videodev.h videodev.h