VGG caffe is not working

Hello everyone.
I’m doing facial recognition using Caffe framework and VGG pre-trained model.
When I run the train_val.prototxt I got this error:

I0125 19:34:31.965451  5184 net.cpp:746] Ignoring source layer fc8
I0125 19:34:31.965508  5184 net.cpp:746] Ignoring source layer prob
I0125 19:34:32.007084  5184 solver.cpp:57] Solver scaffolding done.
I0125 19:34:32.011937  5184 caffe.cpp:239] Starting Optimization
I0125 19:34:32.011982  5184 solver.cpp:289] Solving Face_Recognition_VGG_NET
I0125 19:34:32.011994  5184 solver.cpp:290] Learning Rate Policy: step
I0125 19:34:32.246888  5184 solver.cpp:347] Iteration 0, Testing net (#0)
nvidia@tegra-ubuntu:~$

I’m using this train_val.prototxt:

name: "Face_Recognition_VGG_NET"
layer {
  name: "data"
  type: "Data"
  include {
    phase: TRAIN
  }
 transform_param {
    crop_size: 224
    mean_file: "/home/nvidia/Face-Recognition-using-VGG_FaceNet/mean_train.binaryproto"
    mirror: true
 }
 data_param {
    source: "/home/nvidia/caffe/examples/imagenet/ilsvrc12_train_lmdb"
    batch_size: 256
    backend: LMDB
  }
  top: "data"
  top: "label"
}
layer {
  name: "data"
  type: "Data"
  include {
    phase: TEST
  }
 transform_param {
    crop_size: 224
    mean_file: "/home/nvidia/Face-Recognition-using-VGG_FaceNet/mean_val.binaryproto"
    mirror: false
 }
 data_param {
    source: "/home/nvidia/caffe/examples/imagenet/ilsvrc12_val_lmdb"
    batch_size: 50
    backend: LMDB
  }
  top: "data"
  top: "label"
}
layer {
  bottom: "data"
  top: "conv1_1"
  name: "conv1_1"
  type: "Convolution"
  convolution_param {
    num_output: 64
    pad: 1
    kernel_size: 3
    stride: 1
  }
}
layer {
  bottom: "conv1_1"
  top: "conv1_1"
  name: "relu1_1"
  type: "ReLU"
}
layer {
  bottom: "conv1_1"
  top: "conv1_2"
  name: "conv1_2"
  type: "Convolution"
  convolution_param {
    num_output: 64
    pad: 1
    kernel_size: 3
    stride: 1
  }
}
layer {
  bottom: "conv1_2"
  top: "conv1_2"
  name: "relu1_2"
  type: "ReLU"
}
layer {
  bottom: "conv1_2"
  top: "pool1"
  name: "pool1"
  type: "Pooling"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  bottom: "pool1"
  top: "conv2_1"
  name: "conv2_1"
  type: "Convolution"
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    stride: 1
  }
}
layer {
  bottom: "conv2_1"
  top: "conv2_1"
  name: "relu2_1"
  type: "ReLU"
}
layer {
  bottom: "conv2_1"
  top: "conv2_2"
  name: "conv2_2"
  type: "Convolution"
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    stride: 1
  }
}
layer {
  bottom: "conv2_2"
  top: "conv2_2"
  name: "relu2_2"
  type: "ReLU"
}
layer {
  bottom: "conv2_2"
  top: "pool2"
  name: "pool2"
  type: "Pooling"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  bottom: "pool2"
  top: "conv3_1"
  name: "conv3_1"
  type: "Convolution"
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 1
  }
}
layer {
  bottom: "conv3_1"
  top: "conv3_1"
  name: "relu3_1"
  type: "ReLU"
}
layer {
  bottom: "conv3_1"
  top: "conv3_2"
  name: "conv3_2"
  type: "Convolution"
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 1
  }
}
layer {
  bottom: "conv3_2"
  top: "conv3_2"
  name: "relu3_2"
  type: "ReLU"
}
layer {
  bottom: "conv3_2"
  top: "conv3_3"
  name: "conv3_3"
  type: "Convolution"
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 1
  }
}
layer {
  bottom: "conv3_3"
  top: "conv3_3"
  name: "relu3_3"
  type: "ReLU"
}
layer {
  bottom: "conv3_3"
  top: "pool3"
  name: "pool3"
  type: "Pooling"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  bottom: "pool3"
  top: "conv4_1"
  name: "conv4_1"
  type: "Convolution"
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    stride: 1
  }
}
layer {
  bottom: "conv4_1"
  top: "conv4_1"
  name: "relu4_1"
  type: "ReLU"
}
layer {
  bottom: "conv4_1"
  top: "conv4_2"
  name: "conv4_2"
  type: "Convolution"
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    stride: 1
  }
}
layer {
  bottom: "conv4_2"
  top: "conv4_2"
  name: "relu4_2"
  type: "ReLU"
}
layer {
  bottom: "conv4_2"
  top: "conv4_3"
  name: "conv4_3"
  type: "Convolution"
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    stride: 1
  }
}
layer {
  bottom: "conv4_3"
  top: "conv4_3"
  name: "relu4_3"
  type: "ReLU"
}
layer {
  bottom: "conv4_3"
  top: "pool4"
  name: "pool4"
  type: "Pooling"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  bottom: "pool4"
  top: "conv5_1"
  name: "conv5_1"
  type: "Convolution"
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    stride: 1
  }
}
layer {
  bottom: "conv5_1"
  top: "conv5_1"
  name: "relu5_1"
  type: "ReLU"
}
layer {
  bottom: "conv5_1"
  top: "conv5_2"
  name: "conv5_2"
  type: "Convolution"
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    stride: 1
  }
}
layer {
  bottom: "conv5_2"
  top: "conv5_2"
  name: "relu5_2"
  type: "ReLU"
}
layer {
  bottom: "conv5_2"
  top: "conv5_3"
  name: "conv5_3"
  type: "Convolution"
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    stride: 1
  }
}
layer {
  bottom: "conv5_3"
  top: "conv5_3"
  name: "relu5_3"
  type: "ReLU"
}

layer {
  bottom: "conv5_3"
  top: "conv5_4"
  name: "conv5_4"
  type: "Convolution"
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
  }
}
layer {
  bottom: "conv5_4"
  top: "conv5_4"
  name: "relu5_4"
  type: "ReLU"
}
layer {
  bottom: "conv5_4"
  top: "pool5"
  name: "pool5"
  type: "Pooling"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  bottom: "pool5"
  top: "fc6"
  name: "fc6"
  type: "InnerProduct"
  inner_product_param {
    num_output: 4096
  }
}
layer {
  bottom: "fc6"
  top: "fc6"
  name: "relu6"
  type: "ReLU"
}
layer {
  bottom: "fc6"
  top: "fc6"
  name: "drop6"
  type: "Dropout"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc7"
  type: "InnerProduct"
  bottom: "fc6"
  top: "fc7"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }

}
layer {
  name: "relu7"
  type: "ReLU"
  bottom: "fc7"
  top: "fc7"
}
layer {
  name: "drop7"
  type: "Dropout"
  bottom: "fc7"
  top: "fc7"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "my-fc8"
  type: "InnerProduct"
  bottom: "fc7"
  top: "fc8"
  inner_product_param {
    num_output: 28
    weight_filler {
        type: "xavier"
    }
    bias_filler {
        type: "constant"
        value: 0.1
    }
  }

}
layer {
    name: "accuracy"
    type: "Accuracy"
    bottom: "fc8"
    bottom: "label"
    top: "accuracy"
    include: { phase: TEST }
}
layer {
    name: "loss"
    type: "SoftmaxWithLoss"
    bottom: "fc8"
    bottom: "label"
    top: "loss"
}

Please help me.
I’m using JETSON TX2

Hi,

What kind of use case do you want to do?
Re-train the model or try the inference?

By the way, could you help us point out the error message?
We cannot find an obvious error inside your log.

I0125 19:34:31.965451  5184 net.cpp:746] Ignoring source layer fc8
I0125 19:34:31.965508  5184 net.cpp:746] Ignoring source layer prob
I0125 19:34:32.007084  5184 solver.cpp:57] Solver scaffolding done.
I0125 19:34:32.011937  5184 caffe.cpp:239] Starting Optimization
I0125 19:34:32.011982  5184 solver.cpp:289] Solving Face_Recognition_VGG_NET
I0125 19:34:32.011994  5184 solver.cpp:290] Learning Rate Policy: step
I0125 19:34:32.246888  5184 solver.cpp:347] Iteration 0, Testing net (#0)
nvidia@tegra-ubuntu:~$

Thanks.

Hello. Thanks for your reply and sorry for my late reply.
I was looking information about it.
I was trying to do fine-tuning using VGG 19 layers.
During the training process after Iteration 0, Testing net (#0) As you can see, the training process ends abruptly.
I think is because Jetson TX2 doesn’t has enough memory.

Hi,

YES. TX2 is designed for inference and is not suitable for training due to hardware limitation.
It’s recommended to apply the training job in a desktop environment.

Thanks.