如何解决如何从头调试反向传播实施?
我为从头开始的二进制分类创建了一个简单的神经网络(灵感来自Andrew Ng的一个类的实现)。但是,我认为我在某个地方弄错了反向支撑部分,因为梯度下降无法将成本降到最低。在此示例中,在大约第1300次迭代之后,dJ / dW变为NaN(随后,W也变为NaN)。我仔细检查了方程式,但看不到我犯错的地方。有什么想法吗?
我的代码:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
class BinaryClassifier:
def __init__(self,X,Y,hidden_layers,num_iterations,learning_rate=1.2):
np.random.seed(1)
self.X = X
self.Y = Y
self.Z = {}
self.A = {}
self.W = {}
self.b = {}
self.dZ = {} # dJ/dZ (derivative with respect to Z)
self.dA = {} # dJ/dA (derivative with respect to A)
self.dW = {} # dJ/dW (derivative with respect to W)
self.db = {} # dJ/db (derivative with respect to b)
self.m = self.Y.shape[1] # number of training examples
# hyper parameters:
self.layers = hidden_layers + [1] # the final layer in logestic regression will be a single logistic unit
self.L = len(self.layers) # number of layers (not counting the input layer)
self.num_iterations = num_iterations
self.learning_rate = learning_rate
##### initialize parameters: #####
nodes_prev_layer = self.X.shape[0] # get number of nodes from input layer
for layer,nodes in enumerate(self.layers):
# n.b. scale `W` with Xavier/He initialization:
self.W[layer+1] = np.random.randn(nodes,nodes_prev_layer) * np.sqrt(2/nodes_prev_layer)
self.b[layer+1] = np.zeros((nodes,1))
nodes_prev_layer = nodes
###### utility functions: #####
def relu_function(self,Z):
return np.maximum(Z,0)
def sigmoid_function(self,Z):
return 1/(1 + np.exp(-Z))
def relu_gradient(self,Z):
return np.where(Z > 0,1,0)
def sigmoid_gradient(self,Z):
return self.sigmoid_function(Z) * (1 - self.sigmoid_function(Z))
##### forward propagation steps: #####
def linear_forward(self,A_prev,W,b,activation):
""" Forward step (linear + activation) for single layer.
"""
Z = np.dot(W,A_prev) + b
if activation == 'relu':
A = self.relu_function(Z)
elif activation == 'sigmoid':
A = self.sigmoid_function(Z)
else:
raise ValueError('Invalid activation function: %s' % activation)
assert A.shape == Z.shape
return A,Z
def forward_propagation(self):
""" Feed forward through all layers.
"""
# the 'activated' unit for layer 0 is just the input:
self.A[0] = np.copy(self.X)
# propagate and compute activations for hidden layers
for l in range(1,self.L+1):
if l < self.L:
activation = 'relu'
# use last layer for logistic activation:
else:
activation = 'sigmoid'
self.A[l],self.Z[l] = self.linear_forward(self.A[l-1],self.W[l],self.b[l],activation)
AL = self.A[self.L]
return AL
def compute_cost(self,Y_hat):
cost = -1/self.m * np.sum( (self.Y*np.log(Y_hat)) + ((1-self.Y) * np.log(1-Y_hat)) )
cost = np.squeeze(cost)
assert(cost.shape == ())
return cost
##### backward propagation steps: #####
def linear_backward(self,dA,Z,activation='relu'):
""" Backward propagation (activation + linear) for a single layer.
"""
if activation == 'relu':
dZ = dA * self.relu_gradient(Z)
elif activation == 'sigmoid':
dZ = dA * self.sigmoid_gradient(Z)
else:
raise ValueError('Invalid activation function: %s' % activation)
dW = 1/self.m * np.dot(dZ,A_prev.T)
db = 1/self.m * np.sum(dZ,axis=1,keepdims=True)
dA_prev = np.dot(W.T,dZ) # dA for the previous layer (dA[l-1])
assert dA_prev.shape == A_prev.shape
assert dW.shape == W.shape
return dA_prev,dZ,dW,db
def backward_propagation(self):
""" Backward propagation for all layers.
"""
for l in reversed(range(1,self.L+1)):
if l == self.L:
self.dA[l] = -(np.divide(self.Y,self.A[l]) - np.divide(1-self.Y,1-self.A[l]))
activation = 'sigmoid'
else:
activation = 'relu'
self.dA[l-1],self.dZ[l],self.dW[l],self.db[l] = self.linear_backward(self.A[l-1],self.dA[l],self.Z[l],activation)
def update_parameters(self):
""" Updtes W and b parameters after single iteration of backprop.
"""
for l in range(1,self.L+1):
self.W[l] -= (self.learning_rate * self.dW[l])
self.b[l] -= (self.learning_rate * self.db[l])
##### train/predict methods: #####
def train_binary_classification_model(self,print_cost=True):
""" Trains model and updates parameters.
"""
np.random.seed(1)
for i in range(self.num_iterations):
AL = self.forward_propagation()
if print_cost and i % 500 == 0:
cost = self.compute_cost(AL)
print('cost at %s iterations: %s' % (i,cost))
self.backward_propagation()
self.update_parameters()
def predict(self):
AL = self.forward_propagation()
return np.where(AL > 0.5,0)
并生成样本数据并训练模型:
def generate_data():
np.random.seed(1)
m = 400 # number of examples
N = int(m/2) # number of points per class
D = 2 # dimensionality
X = np.zeros((m,D)) # data matrix where each row is a single example
Y = np.zeros((m,1),dtype='uint8') # labels vector (0 for red,1 for blue)
a = 4 # maximum ray of the flower
for j in range(2):
ix = range(N*j,N*(j+1))
t = np.linspace(j*3.12,(j+1)*3.12,N) + np.random.randn(N)*0.2 # theta
r = a*np.sin(4*t) + np.random.randn(N)*0.2 # radius
X[ix] = np.c_[r*np.sin(t),r*np.cos(t)]
Y[ix] = j
X = X.T
Y = Y.T
return X,Y
########################################
# main:
########################################
X,Y = generate_data()
# train a binary classifcation model with a single hidden layer (4 nodes):
planar_network = BinaryClassifier(X,[4],4000,learning_rate=1.2)
planar_network.train_binary_classification_model()
# output:
# cost at 0 iterations: 0.9897586239010666
# cost at 500 iterations: 0.5513227406119928
# cost at 1000 iterations: 0.5457089978185676
# cost at 1500 iterations: nan
# cost at 2000 iterations: nan
# ...
解决方法
您得到cost = nan
是因为在您的任何步骤中,它都遇到了无效值。
在该网络中可能有两个原因:
-
log(0)
-
number/0
因此,您需要更改以下内容:
-
在您的课程中添加一个变量
self.epsilon = 0.00001
-
更改功能
compute_cost
:def compute_cost(self,Y_hat): cost = -1/self.m * np.sum( (self.Y*np.log(Y_hat+self.epsilon)) + ((1-self.Y) * np.log(1-Y_hat+self.epsilon)) ) cost = np.squeeze(cost) assert(cost.shape == ()) return cost
-
更改功能
backward_propagation
:def backward_propagation(self): """ Backward propagation for all layers. """ for l in reversed(range(1,self.L+1)): if l == self.L: self.dA[l] = -(np.divide(self.Y,self.A[l]+self.epsilon) - np.divide(1-self.Y,1-self.A[l]+self.epsilon)) activation = 'sigmoid' else: activation = 'relu' self.dA[l-1],self.dZ[l],self.dW[l],self.db[l] = self.linear_backward(self.A[l-1],self.dA[l],self.W[l],self.Z[l],self.b[l],activation)
在出现无效值的情况下,我添加了self.epsilon
。
现在您的代码可以使用了。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。