如何解决在test_loader和整个数据集上评估时,准确性不同
我在NumPy中建立了ANN模型。在训练循环中,我评估测试数据集上的模型(来自torch.datasets的MNIST)-1个循环用于历元,1个用于训练,1个用于测试(因此,当模型处于测试循环中时,参数已经更新)。问题是,当我在整个数据集上评估模型时(一次使用x = mnist_test.data.reshape(x.shape[0],-1).numpy() y = mnist_test.targets.numpy()
并从火炬中循环使用DataLoader()
进行批处理,然后得到2个不同的结果。下面是代码结果:
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets
from torchvision.transforms import Compose,Normalize,ToTensor
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
mnist_train = datasets.MNIST('.',train=True,transform=Compose([ToTensor()]),download=True)
mnist_test = datasets.MNIST('.',train=False,download=True)
train_loader = DataLoader(mnist_train,batch_size=60,shuffle=True)
test_loader = DataLoader(mnist_test,batch_size=100,shuffle=False)
class BCE_Loss():
def __init__(self):
pass
def __call__(self,z,y):
if z.shape != y.shape:
y = self.vectorize(z.shape[0],y)
m = y.shape[1]
return (1 / m) * np.sum(np.maximum(z,0) - z * y + np.log(1 + np.exp(- np.abs(z))))
def backward(self,y)
m = y.shape[1]
self.loss = self(z,y)
return (1 / m) * (Sigmoid()(z) - y)
def item(self):
return self.loss
def vectorize(self,num_classes,y):
"""Convert scalar i to 10 dimensional vector with value 1 at i-th index. In case when mse_loss is used for classification task """
y_vectorized = np.zeros((num_classes,y.shape[0]))
for i,y_ in enumerate(y):
y_vectorized[y_][i] = 1
return y_vectorized
class Sigmoid():
def __init__(self):
pass
def __call__(self,x):
self.z_prev = x
self.a = 1 / (1 + np.exp(-x))
return self.a
def backward(self,grad):
self.grad = grad * self.a * (1 - self.a)
return self.grad
def __str__(self):
return 'activation'
class Linear():
def __init__(self,input_size,output_size):
self.w = np.random.randn(output_size,input_size) * np.sqrt(2 / input_size)
self.b = np.random.randn(output_size,1)
def __call__(self,x):
"""x: input with shape (input_size,batch_size). Returns linear transformation x@w+b with shape (output_size,batch_size)"""
self.prev_a = x
self.z = self.w @ self.prev_a + self.b
return self.z
def backward(self,grad):
self.grad_w = grad @ self.prev_a.T
self.grad_b = grad.sum(axis=1,keepdims=True)
self.grad = self.w.T @ grad
return self.grad
def update_params(self,lr):
self.w -= lr * self.grad_w
self.b -= lr * self.grad_b
def __str__(self):
return 'linear'
np.random.seed(23)
class ANN():
def __init__(self,layers):
self.layers = layers
self.num_layers = len(layers)
def __call__(self,x):
for layer in self.layers:
x = layer(x)
return x
def backward(self,grad):
for layer in self.layers[::-1]:
grad = layer.backward(grad)
def update_params(self,lr=1e-4):
for layer in self.layers:
if str(layer) != 'activation':
layer.update_params(lr)
ann = ANN([
Linear(784,256),Sigmoid(),Linear(256,10)
])
loss_fn = BCE_Loss()
# TRAINING
epochs = 10
train_loss = []
train_acc = []
val_loss = []
val_acc = []
lr = 3e-1
sigmoid = Sigmoid()
for epoch in range(epochs):
running_loss = 0.
total = 0.
correct = 0
for i,(x,y) in enumerate(tqdm(train_loader),1):
x = x.reshape(x.shape[0],-1).numpy()
y = y.numpy()
out = ann(x.T)
loss = loss_fn.backward(out,y)
ann.backward(loss)
ann.update_params(lr)
running_loss += loss_fn.item()
total += y.shape[0]
correct += (sigmoid(out.T).argmax(axis=1) == y).sum()
if i % 250 == 0:
print(f'Iter: {i} - Loss: {running_loss / i}')
train_loss.append(running_loss / len(train_loader))
train_acc.append(correct / total)
running_loss = 0.
total = 0.
correct = 0
for i,y) in enumerate(tqdm(test_loader),-1).numpy()
y = y.numpy()
out = ann(x.T)
loss = loss_fn(out,y)
running_loss += loss
total += y.shape[0]
correct += (sigmoid(out.T).argmax(axis=1) == y).sum()
val_loss.append(running_loss / len(test_loader))
val_acc.append(correct / total)
print(f'Epoch: {epoch+1} - Loss: {train_loss[-1]} - Accuracy {train_acc[-1]} - Val Loss: {val_loss[-1]} - Val Accuracy: {val_acc[-1]}')
# For 10th epoch:
#Epoch: 10 - Loss: 0.1848173206776424 - Accuracy 0.9755166666666667 - Val Loss: 0.1995236210747004 - Val Accuracy: 0.971
#CHECKING ACCURACY ON TEST SET
total = 0.
correct = 0.
preds = []
true = []
for x,y in tqdm(test_loader):
x = x.reshape(x.shape[0],-1).numpy()
y = y.numpy()
out = ann(x.T)
total += y.shape[0]
preds.append(sigmoid(out.T).argmax(axis=1))
true.append(y)
correct += (sigmoid(out.T).argmax(axis=1) == y).sum()
correct / total # 0.971
x = mnist_test.data.numpy()
x = x.reshape(x.shape[0],-1)
y = mnist_test.targets.numpy()
out = ann(x.T)
(sigmoid(out.T).argmax(axis=1) == y).sum() / len(x) # 0.9557
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。