Hey!
I’ve decided to plot what I am finding with my observation buffer and I am finding that the values I am getting are consistently on the second step it calls, otherwise it seems that the system initialises.
Here is my print function in my get_observation(self)
to show all these values:
print(self.obs_type)
print(self.obs_buf.shape)
print(type(self.obs_buf))
print("unscale(self.hand_dof_pos, self.hand_dof_lower_limits, self.hand_dof_upper_limits)")
print(self.obs_buf[:, 0:self.num_hand_dofs].shape)
print(self.obs_buf[:, 0:self.num_hand_dofs])
print(unscale(self.hand_dof_pos,
self.hand_dof_lower_limits, self.hand_dof_upper_limits).shape)
print(unscale(self.hand_dof_pos,
self.hand_dof_lower_limits, self.hand_dof_upper_limits))
print("self.object_pos")
print(self.obs_buf[:, 19:22])
print(self.object_pos)
print("self.object_rot")
print(self.obs_buf[:, 22:26])
print(self.object_rot)
print("self.goal_pos")
print(self.obs_buf[:, 26:29])
print(self.goal_pos)
print("self.goal_rot")
print(self.obs_buf[:, 29:33])
print(self.goal_rot)
print("quat_mul(self.object_rot, quat_conjugate(self.goal_rot))")
print(self.obs_buf[:, 33:37])
print(quat_mul(self.object_rot, quat_conjugate(self.goal_rot)))
print("self.fingertip_pos.reshape(self.num_envs, 3*self.num_fingertips)")
print(self.obs_buf[:, 37:52].shape)
print(self.obs_buf[:, 37:52])
print(self.fingertip_pos.reshape(self.num_envs, 3*self.num_fingertips).shape)
print(self.fingertip_pos.reshape(self.num_envs, 3*self.num_fingertips))
print("self.actions")
print(self.obs_buf[:, 52:70].shape)
print(self.obs_buf[:, 52:70])
print(self.actions.shape)
print(self.actions)
And this is the output I find
[2023-10-26 10:57:12] Running RL reset
full
torch.Size([512, 157])
<class 'torch.Tensor'>
unscale(self.hand_dof_pos, self.hand_dof_lower_limits, self.hand_dof_upper_limits)
torch.Size([512, 19])
tensor([[-2.7106e+19, -4.6850e+18, -1.4326e+20, ..., -5.3496e+18,
6.5728e+19, -4.2172e+18],
[-2.7284e+19, -7.4511e+18, -1.5910e+20, ..., -1.3316e+19,
7.2489e+19, -1.0030e+18],
[-8.4951e+19, -1.9560e+19, -4.5132e+20, ..., -1.8580e+19,
2.2441e+20, -7.2675e+18],
...,
[-1.9062e+18, -3.4884e+17, -9.9040e+18, ..., -2.9903e+17,
4.5146e+18, -2.2122e+17],
[ 7.0171e+18, 1.8247e+18, 2.8405e+19, ..., 2.1454e+18,
-1.2537e+19, 7.2946e+17],
[ 2.3270e+18, 4.8319e+17, 9.9849e+18, ..., 5.9163e+17,
-4.7495e+18, 2.8657e+17]], device='cuda:0')
torch.Size([512, 19])
tensor([[-2.7106e+19, -4.6850e+18, -1.4326e+20, ..., -5.3496e+18,
6.5728e+19, -4.2172e+18],
[-2.7284e+19, -7.4511e+18, -1.5910e+20, ..., -1.3316e+19,
7.2489e+19, -1.0030e+18],
[-8.4951e+19, -1.9560e+19, -4.5132e+20, ..., -1.8580e+19,
2.2441e+20, -7.2675e+18],
...,
[-1.9062e+18, -3.4884e+17, -9.9040e+18, ..., -2.9903e+17,
4.5146e+18, -2.2122e+17],
[ 7.0171e+18, 1.8247e+18, 2.8405e+19, ..., 2.1454e+18,
-1.2537e+19, 7.2946e+17],
[ 2.3270e+18, 4.8319e+17, 9.9849e+18, ..., 5.9163e+17,
-4.7495e+18, 2.8657e+17]], device='cuda:0')
self.object_pos
tensor([[-8.4833e+21, -7.3835e+20, -2.2198e+22],
[-7.9510e+21, -1.1100e+21, -2.3025e+22],
[-2.6600e+22, -3.0766e+21, -7.0042e+22],
...,
[-5.9590e+20, -5.4812e+19, -1.5350e+21],
[ 2.2780e+21, 2.9395e+20, 4.7164e+21],
[ 7.5931e+20, 7.7700e+19, 1.6592e+21]], device='cuda:0')
tensor([[ 0.0023, -0.2398, 0.5959],
[-0.0006, -0.2216, 0.6022],
[-0.0063, -0.2329, 0.6030],
...,
[ 0.0044, -0.2347, 0.5892],
[-0.0076, -0.2301, 0.5947],
[ 0.0027, -0.2242, 0.5920]], device='cuda:0')
self.object_rot
tensor([[ 5.8964e+22, 1.5363e+22, 1.3487e+23, -2.1843e+23],
[ 7.3229e+22, -4.3851e+22, 7.3476e+23, -4.9108e+23],
[ 1.7563e+23, 6.9460e+22, 4.4650e+23, -7.0023e+23],
...,
[ 2.8840e+21, 2.1888e+21, 9.9195e+21, -1.3566e+22],
[-1.0801e+22, -1.0295e+21, -3.4238e+22, 1.1092e+23],
[-3.9277e+21, -3.1532e+21, -8.1248e+21, 3.3907e+22]], device='cuda:0')
tensor([[ 0.0595, -0.4677, -0.1114, 0.8748],
[ 0.1791, -0.5222, 0.2705, -0.7887],
[ 0.4778, -0.3883, 0.6115, -0.4970],
...,
[ 0.1742, -0.4895, -0.2864, 0.8050],
[ 0.3821, -0.0880, 0.8965, -0.2064],
[ 0.8855, -0.2252, -0.3938, 0.1002]], device='cuda:0')
self.goal_pos
tensor([[ 5.9488e+21, -2.6233e+22, 6.9139e+22],
[ 5.6819e+21, -7.2834e+22, 3.7922e+23],
[ 1.6076e+22, -7.4109e+22, 1.8002e+23],
...,
[ 5.3549e+20, -3.9982e+20, 5.3106e+21],
[-6.2756e+20, 4.4690e+21, -2.0789e+22],
[-2.6929e+20, 6.9501e+20, -6.8168e+20]], device='cuda:0')
tensor([[ 0.0000, -0.2300, 0.5600],
[ 0.0000, -0.2300, 0.5600],
[ 0.0000, -0.2300, 0.5600],
...,
[ 0.0000, -0.2300, 0.5600],
[ 0.0000, -0.2300, 0.5600],
[ 0.0000, -0.2300, 0.5600]], device='cuda:0')
self.goal_rot
tensor([[-1.7945e+23, 6.4769e+23, -7.4232e+21, -2.2230e+20],
[-1.4681e+24, 1.2916e+24, -7.4893e+21, -4.9259e+20],
[-6.1295e+23, 2.0695e+24, -2.4071e+22, 2.7901e+16],
...,
[-1.4459e+22, 4.0402e+22, -4.6500e+20, -2.1988e+19],
[ 5.9020e+22, -2.6373e+23, 1.0983e+21, 3.0378e+21],
[ 1.1347e+22, -8.1807e+22, 4.8356e+20, -2.7648e+17]], device='cuda:0')
tensor([[ 0.0152, 0.3937, -0.0355, -0.9184],
[ 0.6872, 0.6637, 0.2125, 0.2053],
[ 0.5651, 0.6532, 0.3297, 0.3811],
...,
[ 0.1421, 0.7993, -0.1022, -0.5749],
[ 0.2919, -0.1083, 0.8909, -0.3306],
[ 0.1964, -0.1767, -0.7170, 0.6451]], device='cuda:0')
quat_mul(self.object_rot, quat_conjugate(self.goal_rot))
tensor([[-7.8576e+19, -1.6329e+21, -8.5455e+20, 1.0128e+22],
[ 1.3534e+23, 7.8607e+20, -2.0361e+21, 1.0344e+22],
[-2.1815e+20, -3.4277e+20, -2.9518e+21, 3.4718e+22],
...,
[-6.6304e+19, -1.5157e+18, -4.7317e+19, 6.9647e+20],
[ 2.5963e+19, 2.1001e+18, 3.3997e+20, -2.1074e+21],
[ 4.1819e+18, 7.5831e+18, 9.4548e+19, -7.9512e+20]], device='cuda:0')
tensor([[-0.9827, -0.1639, 0.0855, 0.0076],
[-0.3280, -0.7009, 0.5641, -0.2882],
[ 0.0286, -0.9284, 0.3647, 0.0645],
...,
[-0.8000, -0.4557, -0.3849, -0.0645],
[ 0.9880, 0.1282, -0.0719, 0.0473],
[ 0.5607, 0.2945, 0.4300, -0.6434]], device='cuda:0')
self.fingertip_pos.reshape(self.num_envs, 3*self.num_fingertips)
torch.Size([512, 15])
tensor([[-6.7231e+20, 0.0000e+00, 0.0000e+00, ..., -2.3980e-01,
5.9592e-01, 5.9541e-02],
[-1.2883e+20, 0.0000e+00, 0.0000e+00, ..., -2.2159e-01,
6.0221e-01, 1.7909e-01],
[-1.1470e+21, 0.0000e+00, 0.0000e+00, ..., -2.3290e-01,
6.0295e-01, 4.7780e-01],
...,
[-3.5815e+19, 0.0000e+00, 0.0000e+00, ..., -2.3466e-01,
5.8924e-01, 1.7419e-01],
[ 1.1671e+20, 0.0000e+00, 0.0000e+00, ..., -2.3005e-01,
5.9474e-01, 3.8206e-01],
[ 4.6321e+19, 0.0000e+00, 0.0000e+00, ..., -2.2423e-01,
5.9203e-01, 8.8552e-01]], device='cuda:0')
torch.Size([512, 15])
tensor([[-0.0361, -0.2985, 0.5130, ..., -0.0840, -0.2049, 0.5105],
[-0.0361, -0.2985, 0.5130, ..., -0.0840, -0.2049, 0.5105],
[-0.0361, -0.2985, 0.5130, ..., -0.0840, -0.2049, 0.5105],
...,
[-0.0361, -0.2985, 0.5130, ..., -0.0840, -0.2049, 0.5105],
[-0.0361, -0.2985, 0.5130, ..., -0.0840, -0.2049, 0.5105],
[-0.0361, -0.2985, 0.5130, ..., -0.0840, -0.2049, 0.5105]],
device='cuda:0')
self.actions
torch.Size([512, 18])
tensor([[-0.4677, -0.1114, 0.8748, ..., -0.9184, -0.9827, -0.1639],
[-0.5222, 0.2705, -0.7887, ..., 0.2053, -0.3280, -0.7009],
[-0.3883, 0.6115, -0.4970, ..., 0.3811, 0.0286, -0.9284],
...,
[-0.4895, -0.2864, 0.8050, ..., -0.5749, -0.8000, -0.4557],
[-0.0880, 0.8965, -0.2064, ..., -0.3306, 0.9880, 0.1282],
[-0.2252, -0.3938, 0.1002, ..., 0.6451, 0.5607, 0.2945]],
device='cuda:0')
torch.Size([512, 18])
tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]], device='cuda:0')
full
torch.Size([512, 157])
<class 'torch.Tensor'>
unscale(self.hand_dof_pos, self.hand_dof_lower_limits, self.hand_dof_upper_limits)
torch.Size([512, 19])
tensor([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]], device='cuda:0')
torch.Size([512, 19])
tensor([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]], device='cuda:0')
self.object_pos
tensor([[nan, nan, nan],
[nan, nan, nan],
[nan, nan, nan],
...,
[nan, nan, nan],
[nan, nan, nan],
[nan, nan, nan]], device='cuda:0')
tensor([[ 0.0023, -0.2398, 0.5912],
[-0.0006, -0.2216, 0.5974],
[-0.0063, -0.2329, 0.5982],
...,
[ 0.0044, -0.2347, 0.5845],
[-0.0076, -0.2301, 0.5900],
[ 0.0027, -0.2242, 0.5873]], device='cuda:0')
self.object_rot
tensor([[nan, nan, nan, nan],
[nan, nan, nan, nan],
[nan, nan, nan, nan],
...,
[nan, nan, nan, nan],
[nan, nan, nan, nan],
[nan, nan, nan, nan]], device='cuda:0')
tensor([[ 0.0595, -0.4677, -0.1114, 0.8748],
[ 0.1791, -0.5222, 0.2705, -0.7887],
[ 0.4778, -0.3883, 0.6115, -0.4970],
...,
[ 0.1742, -0.4895, -0.2864, 0.8050],
[ 0.3821, -0.0880, 0.8965, -0.2064],
[ 0.8855, -0.2252, -0.3938, 0.1002]], device='cuda:0')
self.goal_pos
tensor([[nan, nan, nan],
[nan, nan, nan],
[nan, nan, nan],
...,
[nan, nan, nan],
[nan, nan, nan],
[nan, nan, nan]], device='cuda:0')
tensor([[ 0.0000, -0.2300, 0.5600],
[ 0.0000, -0.2300, 0.5600],
[ 0.0000, -0.2300, 0.5600],
...,
[ 0.0000, -0.2300, 0.5600],
[ 0.0000, -0.2300, 0.5600],
[ 0.0000, -0.2300, 0.5600]], device='cuda:0')
self.goal_rot
tensor([[nan, nan, nan, nan],
[nan, nan, nan, nan],
[nan, nan, nan, nan],
...,
[nan, nan, nan, nan],
[nan, nan, nan, nan],
[nan, nan, nan, nan]], device='cuda:0')
tensor([[ 0.0152, 0.3937, -0.0355, -0.9184],
[ 0.6872, 0.6637, 0.2125, 0.2053],
[ 0.5651, 0.6532, 0.3297, 0.3811],
...,
[ 0.1421, 0.7993, -0.1022, -0.5749],
[ 0.2919, -0.1083, 0.8909, -0.3306],
[ 0.1964, -0.1767, -0.7170, 0.6451]], device='cuda:0')
quat_mul(self.object_rot, quat_conjugate(self.goal_rot))
tensor([[nan, nan, nan, nan],
[nan, nan, nan, nan],
[nan, nan, nan, nan],
...,
[nan, nan, nan, nan],
[nan, nan, nan, nan],
[nan, nan, nan, nan]], device='cuda:0')
tensor([[-0.9827, -0.1639, 0.0855, 0.0076],
[-0.3280, -0.7009, 0.5641, -0.2882],
[ 0.0286, -0.9284, 0.3647, 0.0645],
...,
[-0.8000, -0.4557, -0.3849, -0.0645],
[ 0.9880, 0.1282, -0.0719, 0.0473],
[ 0.5607, 0.2945, 0.4300, -0.6434]], device='cuda:0')
self.fingertip_pos.reshape(self.num_envs, 3*self.num_fingertips)
torch.Size([512, 15])
tensor([[ nan, 0.0000, 0.0000, ..., -0.2398, 0.5912, 0.0595],
[ nan, 0.0000, 0.0000, ..., -0.2216, 0.5974, 0.1791],
[ nan, 0.0000, 0.0000, ..., -0.2329, 0.5982, 0.4778],
...,
[ nan, 0.0000, 0.0000, ..., -0.2347, 0.5845, 0.1742],
[ nan, 0.0000, 0.0000, ..., -0.2301, 0.5900, 0.3821],
[ nan, 0.0000, 0.0000, ..., -0.2242, 0.5873, 0.8855]],
device='cuda:0')
torch.Size([512, 15])
tensor([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]], device='cuda:0')
self.actions
torch.Size([512, 18])
tensor([[-0.4677, -0.1114, 0.8748, ..., -0.9184, -0.9827, -0.1639],
[-0.5222, 0.2705, -0.7887, ..., 0.2053, -0.3280, -0.7009],
[-0.3883, 0.6115, -0.4970, ..., 0.3811, 0.0286, -0.9284],
...,
[-0.4895, -0.2864, 0.8050, ..., -0.5749, -0.8000, -0.4557],
[-0.0880, 0.8965, -0.2064, ..., -0.3306, 0.9880, 0.1282],
[-0.2252, -0.3938, 0.1002, ..., 0.6451, 0.5607, 0.2945]],
device='cuda:0')
torch.Size([512, 18])
tensor([[-0.5776, 1.0000, 0.5270, ..., 0.2500, -1.0000, -0.9111],
[ 0.0017, -0.4264, 1.0000, ..., 1.0000, -1.0000, -0.2598],
[-0.4309, 0.3819, 0.1305, ..., 0.4660, 0.4254, -1.0000],
...,
[ 0.4382, 1.0000, -0.7657, ..., 1.0000, -0.4103, -0.3327],
[ 0.0828, 0.1702, 0.4420, ..., -0.0640, 0.1705, 0.7118],
[-0.3522, -0.5169, 0.8220, ..., 0.8664, 1.0000, -0.6398]],
device='cuda:0')
Error executing job with overrides: ['task=RH8D', 'num_envs=512', 'headless=False']
Would this likely be a bad set-up of the physical parameters itself or is this likely an issue with setting up the custom classes incorrectly for the articulation and/or articulation view? My interpretation is that the hand_dof_pos and fingertip_pos are not being read properly.
I’ve done a bit more testing and I find that slightly increasing the time step (dt) valid can reduce the number of nan found in the observation buffer. I’m wondering if it’s something to do with the speed which the observation buffer can count, or if it’s just an unstable simulation.