Sorry late for reply.Here is my main scripts for caffe2 :
def create_model_FP16(m, device_opts,dtype):
with core.DeviceScope(device_opts):
initializer = pFP16Initializer
with brew.arg_scope([brew.conv, brew.fc],
WeightInitializer=initializer,
BiasInitializer=initializer,
enable_tensor_core=True):
conv1 = brew.conv(m, ‘data’, ‘conv1’, dim_in=1, dim_out=96, kernel=11,stride=4)
relu1 = brew.relu(m,conv1, ‘conv1’)
norm1 = brew.lrn(m,relu1, ‘norm1’, size=5, alpha=0.0001, beta=0.75)
pool1 = brew.max_pool(m, norm1, ‘pool1’, kernel=3, stride=2)
conv2 = brew.conv(m, pool1, ‘conv2’, dim_in=96, dim_out=256, kernel=5)
relu2 = brew.relu(m,conv2, ‘conv2’)
norm2 = brew.lrn(m,relu2, ‘norm2’, size=5, alpha=0.0001, beta=0.75)
pool2 = brew.max_pool(m, norm2, ‘pool2’, kernel=3, stride=2)
conv3 = brew.conv(m, pool2, ‘conv3’, dim_in=256, dim_out=384, kernel=3)
relu3 = brew.relu(m,conv3, ‘conv3’)
conv4 = brew.conv(m, relu3, ‘conv4’, dim_in=384, dim_out=384, kernel=3)
relu4 = brew.relu(m,conv4, ‘conv4’)
conv5 = brew.conv(m, conv4, ‘conv5’, dim_in=384, dim_out=256, kernel=3)
relu5 = brew.relu(m,conv5, ‘conv5’)
pool5 = brew.max_pool(m, relu5, ‘pool5’, kernel=3, stride=2)
fc6 = brew.fc(m, pool5, ‘fc6’, dim_in=25622, dim_out=4096)
relu6 = brew.relu(m,fc6, ‘fc6’)
dropout1 = brew.dropout(m,relu6, ‘dropout1’, ratio=0.5, is_test=0)
fc7 = brew.fc(m, dropout1, ‘fc7’, dim_in=4096, dim_out=4096)
relu7 = brew.relu(m,fc7, ‘fc7’)
dropout2 = brew.dropout(m,relu7, ‘dropout2’, ratio=0.5, is_test=0)
fc8 = brew.fc(m, dropout2, ‘fc8’, dim_in=4096, dim_out=1000)
softmax = brew.softmax(m, fc8, ‘softmax’)
m.net.AddExternalOutput(softmax)
# New Addition
softmax=m.net.HalfToFloat(softmax, softmax + ‘_fp32’)
xent = m.LabelCrossEntropy([softmax, “label”], ‘xent’)
loss = m.AveragedLoss(xent, “loss”)
brew.accuracy(m, [softmax, “label”], “accuracy”)
m.AddGradientOperators([loss])
opt = optimizer.build_sgd(m, base_learning_rate=0.01, policy="step", stepsize=1, gamma=0.999)
return softmax
#########
Here is my run the function “create_model_FP16”:
workspace.FeedBlob(“data”, data, device_option=device_opts)
workspace.FeedBlob(“label”, label, device_option=device_opts)
train_model= model_helper.ModelHelper(name="train_net")
softmax = create_model_FP16(train_model, device_opts=device_opts,dtype=dtype,is_test='false')
with core.DeviceScope(device_opts):
brew.add_weight_decay(train_model, 0.001) # any effect???
workspace.RunNetOnce(train_model.param_init_net)
workspace.CreateNet(train_model.net)
I run this code on desktop GPU(1050Ti) is ok,but on Nvidia TX2 it occurs error as belows:
“Cannot create operator of type ‘CreateMutex’ on the device ‘CUDA’”
Thanks for reply.