如何解决Pytorch 中非常高的验证损失/小火车损失,同时对 resnet 50 进行微调
我正在训练模型来对 2 种类型的图像进行分类。我决定采用迁移学习方法,冻结 resnet50 和新层的每个部分并开始微调过程。我的数据集并不完全平衡,但我为此使用了权重。请查看验证损失与训练损失图。这似乎非常不一致。你能看看我的代码吗?我是 Pytorch 的新手,也许我的方法和代码有问题。在测试集上测试的最终准确率为 86%。谢谢!
learning_rate = 1e-1
num_epochs = 100
patience = 10
batch_size = 100
weights = [4,1]
model = models.resnet50(pretrained=True)
# Replace last layer
num_features = model.fc.in_features
model.fc = nn.Sequential(
nn.Linear(num_features,512),nn.ReLU(inplace=True),nn.Linear(512,64),nn.Dropout(0.5,inplace=True),nn.Linear(64,2))
class_weights = torch.FloatTensor(weights).cuda()
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
running_loss = 0
losses = []
# To freeze the residual layers
for param in model.parameters():
param.requires_grad = False
for param in model.fc.parameters():
param.requires_grad = True
# Find total parameters and trainable parameters
total_params = sum(p.numel() for p in model.parameters())
print(f'{total_params:,} total parameters.')
total_trainable_params = sum(
p.numel() for p in model.parameters() if p.requires_grad)
print(f'{total_trainable_params:,} training parameters.')
总共 24,590,082 个参数。 1,082,050 个训练参数。
# initialize the early_stopping object
early_stopping = pytorchtools.EarlyStopping(patience=patience,verbose=True)
for epoch in range(num_epochs):
##########################
#######TRAIN MODEL########
##########################
epochs_loss=0
##Switch to train mode
model.train()
for i,(images,labels) in enumerate(train_dl):
# Move tensors to the configured device
images = images.to(device)
labels = labels.to(device)
# Forward pass
# Backprpagation and optimization
optimizer.zero_grad()
outputs = model(images).to(device)
loss = criterion(outputs,labels)
loss.backward()
optimizer.step()
#calculate train_loss
train_losses.append(loss.item())
##########################
#####VALIDATE MODEL#######
##########################
model.eval()
for images,labels in val_dl:
images = images.to(device)
labels = labels.to(device)
outputs = model(images).to(device)
loss = criterion(outputs,labels)
valid_losses.append(loss.item())
# print training/validation statistics
# calculate average loss over an epoch
train_loss = np.average(train_losses)
valid_loss = np.average(valid_losses)
# print(train_loss)
avg_train_losses.append(train_loss)
avg_valid_losses.append(valid_loss)
print_msg = (f'train_loss: {train_loss:.5f} ' + f'valid_loss: {valid_loss:.5f}')
print(print_msg)
# clear lists to track next epoch
train_losses = []
valid_losses = []
early_stopping(valid_loss,model)
print(epoch)
if early_stopping.early_stop:
print("Early stopping")
break
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。