PyTorch "Segmentation fault (core dumped)" After Forward Propagation

I have this model that I am running some sample batches from the MNIST fashion dataset

import torchvision
import torchvision.transforms as transforms
import torch
import matplotlib.pyplot as plt 
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

trainset = torchvision.datasets.FashionMNIST(root = "./data", train = True, download = True, transform = transforms.ToTensor())
testset = torchvision.datasets.FashionMNIST(root = "./data", train = False, download = True, transform = transforms.ToTensor())

trainloader = torch.utils.data.DataLoader(trainset, batch_size = 8, shuffle = True)
testloader = torch.utils.data.DataLoader(testset, batch_size= 8, shuffle = False)

device = torch.device("cuda:0")
print(device)

class vgg16(nn.Module):
    def __init__(self):
        super(vgg16, self).__init__()

        ## note that vgg always does same padding on convolutions
        ## dec img size by pooling and inc channels using kernels
        self.cnn_block = nn.Sequential(
            nn.Conv2d(1, 64, 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(64, 256, 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(256, 512, 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2, 1)
            # out = 6x6 img
        )

        self.fc_block = nn.Sequential(
            # 6x6x512 = 18432
            nn.Linear(18432, 4096),
            nn.ReLU(),
            nn.Linear(4096, 1024),
            nn.ReLU(),
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, 10),
            # nn.Softmax(dim = 1) 
        )

    def forward(self, x):
        x = self.cnn_block(x)
        x = x.view(x.size(0), -1)
        x = self.fc_block(x)
        return x

I seem to be getting a Segmentation fault after completing one forward propagation. The program is able to do many forward propagation successfully but when it is done i get the segmentation fault. Also I do not have this problem when i set the device to “cpu”. This error only occurs when i set the device to “cuda:0” and it is the only error i get after successfully completing forward prop

Here is the snippet for the forward prop (doing it twice):

net = vgg16().to(device)
loss_func = nn.CrossEntropyLoss()
opt = optim.Adam(net.parameters(), lr = 0.0001)

dat = iter(testloader).__next__()[0].to(device)
print(dat.size())
out = net(dat)
print(out)

dat = iter(testloader).__next__()[0].to(device)
print(dat.size())
out = net(dat)
print(out)
  • Python 3.6
  • PyTorch 1.6
  • L4T 32.4.3
  • JetPack 4.4
  • CUDA: 10.2.89
  • cuDNN: 8.0.0.180

Can anybody point out whats wrong? Thanks so much!

I found something that pretty much answers my post. Here it is:

1 Like