Hi guys, I think cudnn gru path has bug, because following code got different result, please check it, thanks.
import numpy as np
import tensorflow as tf
from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops
S_MIN=-40
S_MAX=13
EXP_MAX=40
def native_gru(input, w):
seq_len, batch_size, hidden_size=input.shape
offset = 0
wu = w[offset:offset + hidden_size*hidden_size].reshape(
(hidden_size,hidden_size)).transpose()
offset += hidden_size*hidden_size
wr = w[offset:offset + hidden_size*hidden_size].reshape(
(hidden_size,hidden_size)).transpose()
offset += hidden_size*hidden_size
wc = w[offset:offset + hidden_size*hidden_size].reshape(
(hidden_size,hidden_size)).transpose()
offset += hidden_size*hidden_size
ru = w[offset:offset + hidden_size*hidden_size].reshape(
(hidden_size,hidden_size)).transpose()
offset += hidden_size*hidden_size
rr = w[offset:offset + hidden_size*hidden_size].reshape(
(hidden_size,hidden_size)).transpose()
offset += hidden_size*hidden_size
rc = w[offset:offset + hidden_size*hidden_size].reshape(
(hidden_size,hidden_size)).transpose()
offset += hidden_size*hidden_size
bx_u = w[offset:offset + hidden_size]
offset += hidden_size
bx_r = w[offset:offset + hidden_size]
offset += hidden_size
bx_c = w[offset:offset + hidden_size]
offset += hidden_size
bh_u = w[offset:offset + hidden_size]
offset += hidden_size
bh_r = w[offset:offset + hidden_size]
offset += hidden_size
bh_c = w[offset:offset + hidden_size]
offset += hidden_size
def sigmod(x):
y = np.copy(x)
y[x < S_MIN] = S_MIN
y[x>S_MAX] = S_MAX
return 1./(1. + np.exp(-y))
def tanh(x):
y=-2.*x
y[y>EXP_MAX] = EXP_MAX
return (2./(1.+np.exp(y))) - 1
output = []
pre_h = np.zeros((batch_size, hidden_size), dtype=input.dtype)
for i in range(seq_len):
emb_1 = input[i]
update_gate = sigmod(np.matmul(emb_1, wu) + np.matmul(pre_h, ru)+bx_u+bh_u)
reset_gate = sigmod(np.matmul(emb_1, wr)+np.matmul(pre_h, rr)+bx_r+bh_r)
h_t_temp = tanh(np.matmul(emb_1, wc)+reset_gate*(np.matmul(pre_h,rc)+bh_c)+bx_c)
new_h = update_gate*pre_h + (1-update_gate)*h_t_temp
pre_h = new_h
output.append(new_h)
output = np.concatenate(output, -1)
output = output.reshape((batch_size, -1, hidden_size))
output = output.transpose((1,0,2))
return output
def TestGRU():
num_steps = 2
batch_size = 1
hidden_size = 1
input_w_size = 6hidden_sizehidden_size + 6*hidden_size
x = np.random.uniform(low=-0.1, high=0.1, size=(num_steps, batch_size, hidden_size)).astype(np.float32)
#x = np.ones((num_steps, batch_size, hidden_size)).astype(np.float32)
flat_w = np.random.uniform(low=-0.1, high=0.1, size=(input_w_size)).astype(np.float32)
#flat_w = np.ones((input_w_size)).astype(np.float32)
out = native_gru(x, flat_w)
n_layer = 1
rnn_cudnn = cudnn_rnn_ops.CudnnGRU(n_layer, hidden_size, hidden_size, 'linear_input')
#param_cudnn = tf.Variable(tf.ones([rnn_cudnn.params_size()]), validate_shape=False)
param_cudnn = tf.Variable(flat_w)
y_cudnn, state_cudnn = rnn_cudnn(x,#tf.transpose(x, [1,0,2]),
tf.zeros([n_layer, batch_size, hidden_size]), param_cudnn)
print("native gru:"+str(out))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print("cudnn gru:" +str(sess.run([y_cudnn])))
if name == ‘main’:
TestGRU()