为什么我的神经网络经过几个时期后权重极低？

如何解决为什么我的神经网络经过几个时期后权重极低？

我刚刚开始学习神经网络，这是我的第一个。问题是我拥有的数据越多，经过2-3个纪元后权重就变得越低，这很不正常，这使我的NN一无所获。

要放弃 在DataSet类中，搜索函数CreateData并将nbofexample更改为20，您将看到是否打印了它们处于正常范围内的权重（均匀分布在-1和1之间），但是如果您将nbofexample设置为某种值例如200，那么仅经过2或3个纪元后，最后一层的大部分weigth都将非常接近于0，并且在其余的训练中它们将停留在该区域中。显然，这会导致NN失败。

顺便说一句，我的NN基本上是分析0到9之间的数字数组除以10作为规范化，以检查该数组是否已排序。在下面的代码中，我发表了很多注释，这些代码很容易理解。

可能有一个简单的解决方法，但我只是不明白：（

如果您想尝试，这里是完整的代码：（在python btw中）

import numpy as np
import time
import random
import time

#This class is only used for creating the data if needed
class DataSet():
    
    #check if sorted
    def checkPossibility(A):
        return sorted(A) == A

    #will be used later for more complex problems (taken from the faster answer of a coding challenge on LeetCode)
    #def checkPossibility(A):
    #    p = None
    #    for i in range(len(A) - 1):
    #        if A[i] > A[i+1]:
    #            if p is not None:
    #                return False
    #            p = i
    #    return (p is None or p == 0 or p == len(A)-2 or
    #            A[p-1] <= A[p+1] or A[p] <= A[p+2])
    

    #returns inputs and outputs using my poorly written algorithm
    def CreateData():
        
        #settings
        nbofchar=4
        nbofexample=200
        
        #initialize arrays
        inputs = [0]*nbofchar;
        output = [1]
        
        #handling dumbness
        if nbofexample>pow(10,nbofchar): 
            print("Too much data... resizing to max data")
            nbofexample=pow(10,nbofchar)
        elif nbofexample==0:
            print("You need examples to train! (Error nbofexample==0)")
        
        #if there is more than half of the max possible example being request,then create all possible examples and delete randomly until it's the requested size
        if nbofexample>pow(10,nbofchar)/2:
            
            #creating all possible examples
            for i in range(1,pow(10,nbofchar)): 
                new_ex = [int(a) for a in str(i)]
                while len(new_ex)<nbofchar:
                    new_ex=[0]+new_ex
                inputs = np.vstack((inputs,np.dot(new_ex,1/10)))  #normalization /10 so the value is between 0 and 1 ¯\_(ツ)_/¯
                output = np.vstack((output,[int(DataSet.checkPossibility(new_ex))]))
            
            #deleting     
            while len(inputs)>nbofexample:
                index = random.randint(0,len(inputs)-1)
                inputs = np.delete(inputs,index)
                output = np.delete(output,index)

            return inputs,output
        
        #if there is less than half (or half) then,create example randomly until it's the requested size
        else:
            i=1
            while i < nbofexample: 
                new_ex = [random.randint(0,9) for a in range(nbofchar)]
                if sum(np.any(inputs)==new_ex)==0:
                    i+=1
                    inputs = np.vstack((inputs,1/10)))    #normalization /10 so the value is between 0 and 1 ¯\_(ツ)_/¯
                    output = np.vstack((output,[int(DataSet.checkPossibility(new_ex))]))
            return inputs,output

#assigning weights to each layer
class NeuLayer():
    def __init__(self,nbofneuron,inputsperneuron):
        self.weight = 2 * np.random.random((inputsperneuron,nbofneuron))-1

#the actual neural network
class NeuNet():    

        def __init__(self,layers):
            self.layers = layers

        def _sigmoid(self,x):
            k = 1
            return 1 / (1+np.exp(-x/k))

        def _sigmoid_derivative(self,x):
            return x * (1-x)

        def train(self,training_set_inputs,training_set_outputs,nboftime):

            #debug
            timer1 = 0


            if len(self.layers)<2: return

            for iteration in range(nboftime):
                
                delta = [0] * len(self.layers)
                error = [0] * len(self.layers)
                outputlayers = self.think(training_set_inputs)
                
                #find deltas for each layer "i" (to be able to properly change weights)
                for i in range(len(self.layers)-1,-1,-1):
                    if i==len(self.layers)-1:
                        error[i] = training_set_outputs - outputlayers[i]                      
                    else:
                        error[i] = np.dot(delta[i+1],self.layers[i+1].weight.T)
                    delta[i] = error[i] * self._sigmoid_derivative(outputlayers[i])              


                #assign weigths for each layer "i"
                for i in range(len(self.layers)):
                   if i==0:
                       self.layers[0].weight += np.dot(training_set_inputs.T,delta[0])
                   else:
                       self.layers[i].weight += np.dot(outputlayers[i-1].T,delta[i])

                #display progression and the test result
                if Display_progression: 
                    if timer1<time.time():
                        timer1=time.time()+delay
                        value = ((iteration+1)/nboftime)*100
                        test_input = np.array([.1,.2,.1,.1])
                        print('%.2f'%value+"%     test_input = " + str(test_input) + "     test_output = "+ str(self.think(test_input)[-1]))

        #return output of each layer from an input
        def think(self,input):
            outforlayers = [None]*len(self.layers)
            outforlayer = input
            for i in range(len(self.layers)):
                outforlayer = self._sigmoid(np.dot(outforlayer,self.layers[i].weight))
                outforlayers[i] = outforlayer
            return outforlayers

#datamaker
creating_data=True
train = True

if creating_data:
    
    #creates files with inputs and their expected output
    print("Start creating data...")
    input,output = DataSet.CreateData();
    print("Data created!")
    file = open("data_input","wb")
    np.save(file,input)
    file.close;
    file = open("data_output",output)
    file.close;

if train:

    default_data_set=False

    if default_data_set:
        #default training set
        inp_training = np.array([[0,0],[0.1,[0,0.1,0.1],0.1]])
        out_training = np.array([[0,1,1]]).T

    else:
        print("Loading data files...")
        file = open("data_input","rb")
        inp_training = np.load(file)
        file.close;
        file = open("data_output","rb")
        out_training = np.load(file)
        file.close;
        print("Done reading from data files!")


    #debug
    Display_progression = True;
    delay = 1   #seconds

    #initialize
    np.random.seed(5)
    netlayer_input = NeuLayer(10,len(inp_training[0]))
    netlayer2 = NeuLayer(10,10)
    netlayer3 = NeuLayer(10,10)
    netlayer4 = NeuLayer(10,10)
    netlayer_out = NeuLayer(len(out_training[0]),10)
    All_layers = [netlayer_input,netlayer2,netlayer3,netlayer4,netlayer_out]
    brain = NeuNet(All_layers)

    #train
    print("Start training...")
    brain.train(inp_training,out_training,100000)
    print("Done!")


    #final test
    outputfinal = brain.think(np.array([0,.3,.7]))


    #output
    a = outputfinal[-1] #[-1] so we get the last layer's output(s)
    print(a)

注意这是我第一次问有关stackoverflow的问题，所以请告诉我是否缺少该问题的关键信息。

解决方法

由于更经典的激活（如Sigmoid或Tanh），神经网络可能会遭受Vanishing Gradient Problem之苦。

用外行话来说，基本上像Sigmoid和Tanh这样的激活实际上会压缩输入，对吗？例如，sigmoid（10）和sigmoid（100）分别为.9999和1。即使输入发生了很大变化，输出也几乎没有变化-该功能在这一点上实际上是恒定的。在函数几乎恒定的情况下，其导数趋于零（或很小的值）。这些非常小的导数/梯度彼此相乘，并且实际上变为零，从而使您的模型根本无法学习任何东西-权重被卡住并停止更新。

我建议您根据自己的时间做一些进一步的阅读。在几种解决方案中，解决此问题的一种方法是使用不同的激活方式，例如ReLU。

为什么我的神经网络经过几个时期后权重极低？

如何解决为什么我的神经网络经过几个时期后权重极低？

解决方法

相关推荐