如何解决为什么我的神经网络经过几个时期后权重极低?
我刚刚开始学习神经网络,这是我的第一个。问题是我拥有的数据越多,经过2-3个纪元后权重就变得越低,这很不正常,这使我的NN一无所获。
要放弃 在DataSet类中,搜索函数CreateData并将nbofexample更改为20,您将看到是否打印了它们处于正常范围内的权重(均匀分布在-1和1之间),但是如果您将nbofexample设置为某种值例如200,那么仅经过2或3个纪元后,最后一层的大部分weigth都将非常接近于0,并且在其余的训练中它们将停留在该区域中。显然,这会导致NN失败。
顺便说一句,我的NN基本上是分析0到9之间的数字数组除以10作为规范化,以检查该数组是否已排序。在下面的代码中,我发表了很多注释,这些代码很容易理解。
可能有一个简单的解决方法,但我只是不明白:(
如果您想尝试,这里是完整的代码:(在python btw中)
import numpy as np
import time
import random
import time
#This class is only used for creating the data if needed
class DataSet():
#check if sorted
def checkPossibility(A):
return sorted(A) == A
#will be used later for more complex problems (taken from the faster answer of a coding challenge on LeetCode)
#def checkPossibility(A):
# p = None
# for i in range(len(A) - 1):
# if A[i] > A[i+1]:
# if p is not None:
# return False
# p = i
# return (p is None or p == 0 or p == len(A)-2 or
# A[p-1] <= A[p+1] or A[p] <= A[p+2])
#returns inputs and outputs using my poorly written algorithm
def CreateData():
#settings
nbofchar=4
nbofexample=200
#initialize arrays
inputs = [0]*nbofchar;
output = [1]
#handling dumbness
if nbofexample>pow(10,nbofchar):
print("Too much data... resizing to max data")
nbofexample=pow(10,nbofchar)
elif nbofexample==0:
print("You need examples to train! (Error nbofexample==0)")
#if there is more than half of the max possible example being request,then create all possible examples and delete randomly until it's the requested size
if nbofexample>pow(10,nbofchar)/2:
#creating all possible examples
for i in range(1,pow(10,nbofchar)):
new_ex = [int(a) for a in str(i)]
while len(new_ex)<nbofchar:
new_ex=[0]+new_ex
inputs = np.vstack((inputs,np.dot(new_ex,1/10))) #normalization /10 so the value is between 0 and 1 ¯\_(ツ)_/¯
output = np.vstack((output,[int(DataSet.checkPossibility(new_ex))]))
#deleting
while len(inputs)>nbofexample:
index = random.randint(0,len(inputs)-1)
inputs = np.delete(inputs,index)
output = np.delete(output,index)
return inputs,output
#if there is less than half (or half) then,create example randomly until it's the requested size
else:
i=1
while i < nbofexample:
new_ex = [random.randint(0,9) for a in range(nbofchar)]
if sum(np.any(inputs)==new_ex)==0:
i+=1
inputs = np.vstack((inputs,1/10))) #normalization /10 so the value is between 0 and 1 ¯\_(ツ)_/¯
output = np.vstack((output,[int(DataSet.checkPossibility(new_ex))]))
return inputs,output
#assigning weights to each layer
class NeuLayer():
def __init__(self,nbofneuron,inputsperneuron):
self.weight = 2 * np.random.random((inputsperneuron,nbofneuron))-1
#the actual neural network
class NeuNet():
def __init__(self,layers):
self.layers = layers
def _sigmoid(self,x):
k = 1
return 1 / (1+np.exp(-x/k))
def _sigmoid_derivative(self,x):
return x * (1-x)
def train(self,training_set_inputs,training_set_outputs,nboftime):
#debug
timer1 = 0
if len(self.layers)<2: return
for iteration in range(nboftime):
delta = [0] * len(self.layers)
error = [0] * len(self.layers)
outputlayers = self.think(training_set_inputs)
#find deltas for each layer "i" (to be able to properly change weights)
for i in range(len(self.layers)-1,-1,-1):
if i==len(self.layers)-1:
error[i] = training_set_outputs - outputlayers[i]
else:
error[i] = np.dot(delta[i+1],self.layers[i+1].weight.T)
delta[i] = error[i] * self._sigmoid_derivative(outputlayers[i])
#assign weigths for each layer "i"
for i in range(len(self.layers)):
if i==0:
self.layers[0].weight += np.dot(training_set_inputs.T,delta[0])
else:
self.layers[i].weight += np.dot(outputlayers[i-1].T,delta[i])
#display progression and the test result
if Display_progression:
if timer1<time.time():
timer1=time.time()+delay
value = ((iteration+1)/nboftime)*100
test_input = np.array([.1,.2,.1,.1])
print('%.2f'%value+"% test_input = " + str(test_input) + " test_output = "+ str(self.think(test_input)[-1]))
#return output of each layer from an input
def think(self,input):
outforlayers = [None]*len(self.layers)
outforlayer = input
for i in range(len(self.layers)):
outforlayer = self._sigmoid(np.dot(outforlayer,self.layers[i].weight))
outforlayers[i] = outforlayer
return outforlayers
#datamaker
creating_data=True
train = True
if creating_data:
#creates files with inputs and their expected output
print("Start creating data...")
input,output = DataSet.CreateData();
print("Data created!")
file = open("data_input","wb")
np.save(file,input)
file.close;
file = open("data_output",output)
file.close;
if train:
default_data_set=False
if default_data_set:
#default training set
inp_training = np.array([[0,0],[0.1,[0,0.1,0.1],0.1]])
out_training = np.array([[0,1,1]]).T
else:
print("Loading data files...")
file = open("data_input","rb")
inp_training = np.load(file)
file.close;
file = open("data_output","rb")
out_training = np.load(file)
file.close;
print("Done reading from data files!")
#debug
Display_progression = True;
delay = 1 #seconds
#initialize
np.random.seed(5)
netlayer_input = NeuLayer(10,len(inp_training[0]))
netlayer2 = NeuLayer(10,10)
netlayer3 = NeuLayer(10,10)
netlayer4 = NeuLayer(10,10)
netlayer_out = NeuLayer(len(out_training[0]),10)
All_layers = [netlayer_input,netlayer2,netlayer3,netlayer4,netlayer_out]
brain = NeuNet(All_layers)
#train
print("Start training...")
brain.train(inp_training,out_training,100000)
print("Done!")
#final test
outputfinal = brain.think(np.array([0,.3,.7]))
#output
a = outputfinal[-1] #[-1] so we get the last layer's output(s)
print(a)
注意 这是我第一次问有关stackoverflow的问题,所以请告诉我是否缺少该问题的关键信息。
解决方法
由于更经典的激活(如Sigmoid或Tanh),神经网络可能会遭受Vanishing Gradient Problem之苦。
用外行话来说,基本上像Sigmoid和Tanh这样的激活实际上会压缩输入,对吗?例如,sigmoid(10)和sigmoid(100)分别为.9999和1。即使输入发生了很大变化,输出也几乎没有变化-该功能在这一点上实际上是恒定的。在函数几乎恒定的情况下,其导数趋于零(或很小的值)。这些非常小的导数/梯度彼此相乘,并且实际上变为零,从而使您的模型根本无法学习任何东西-权重被卡住并停止更新。
我建议您根据自己的时间做一些进一步的阅读。在几种解决方案中,解决此问题的一种方法是使用不同的激活方式,例如ReLU。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。