使用Flux运行功能时未更新模型

如何解决使用Flux运行功能时未更新模型

所以，我正在用一些数据训练神经网络。我在REPL上运行了所有程序，并且运行的方向正确，但是当我尝试将代码包装在function()中时，我的model(x)停止更新：

所以，我的代码如下：

using Flux;
using Flux.Optimise: update!;
using Flux: normalise;
using Flux: onecold;
using Flux: onehotbatch;
using Flux: @epochs;
using Flux: throttle;

Random.seed!(125);

ep_max  = 2;  # number of epochs
batch   = 100;   # batch size for training
lr      = 0.001  # learning rate
spt     = 0.01;   # Split ratio: define % to be used as Test data
opt     = ADAM(lr,(0.9,0.8)); # Optimizer
time_show = 5;
dat_groups = 1:10;
dat_num    = 100;
creating   = false;
reading    = true;

if creating
  data_creator(dat_groups,dat_num); # I create data and store it
end
if reading
  xtrain,ytrain = data_reader(dat_groups); #reads data
end

# batching data
datatrain,datatest = getdata(xtrain',ytrain',spt,batch); # DataLoader function in here                                          

xtr,ytr = recoverdata(datatrain); # recovering training data,to be used if needed
xts,yts = recoverdata(datatest);  # recovering test data,to be used if needed
m = layers(size(xtr,1),size(ytr,1)); # creates layers (6 layers,tanh)
ps = Flux.params(m);                 # initialize parameters

trainmode!(m,true)
evalcb = () -> @show(loss_all(datatrain,m))

for i = 1:ep_max # run ep_max times for a single batch
  println()
  println("**************")
  println(i)
  println()
  Flux.train!(loss,ps,datatrain,opt,cb = throttle(evalcb,time_show));
  println()
  @show accuracy(datatest,m)
end

function accuracy(dataloader,model)
    acc = 0
    for (x,y) in dataloader
        println()
        mod_x = model(x); # model evaluation

        cpu_mod_x = cpu(mod_x);
        cpu_y     = cpu(y);
        one_cpu_mod_x = onecold(cpu_mod_x);
        one_cpu_y     = onecold(cpu_y)

        @show mod_x # HERE IS WHERE THINGS GO WRONG 
        acc += sum(one_cpu_mod_x .== one_cpu_y)*1 / size(x,2)
    end
    acc/length(dataloader);
end

loss(x,y)   = Flux.mse(m(x),y);

例如，当我运行这段代码进行2个时期的迭代时，它给出了：

julia> include("main.jl")
size of X data is :(1000,63)
size of Y data is :(1000,6)


[ Info: Batching data...
[ Info: splitting into 990.0,10.0

[ Info: Batching train data...
[ Info: Batching test data...
┌ Warning: Number of data points less than batchsize,decreasing the batchsize to 10
└ @ Flux.Data ~/.julia/packages/Flux/Fj3bt/src/data/dataloader.jl:64
[ Info: layers created....


**************
1

loss_all(datatrain,m) = 0.3405524244181338


mod_x = Float32[0.21500134 0.25191692 0.28280517 0.14269947 0.12386108 0.22535957 0.38209096 0.21966429 0.061912186 0.32045293; 0.30720016 0.36394575 0.23585278 0.019663436 0.033996515 0.37338153 0.22447488 0.21927631 0.22822481 0.124495685; 0.039474137 -0.13775912 -0.0623653 0.021980956 -0.028107032 -0.027529262 -0.06072978 -0.13554919 -0.04740917 -0.020533875; 0.05143341 0.13719048 0.08347133 0.008867923 0.09923494 0.058163155 0.13347353 0.14189252 0.001730077 0.14392109; 0.119510576 0.07049953 0.05730217 0.5498258 -0.33574563 0.32612923 0.3832937 -0.06748764 0.2360552 0.15549593; 0.33197474 0.16447222 0.27249426 -0.15527818 0.2785189 0.34654236 0.124443345 0.18982176 0.26248497 0.16329157]
acc += (sum(one_cpu_mod_x .== one_cpu_y) * 1) / size(x,2) = 0.2
accuracy(datatest,m) = 0.2

**************
2

loss_all(datatrain,m) = 0.22800623235686412


mod_x = Float32[0.45594802 0.4247107 0.42235023 0.27602637 0.38965002 0.37095627 0.46256495 0.4262407 0.25281948 0.53176546; 0.31144667 0.32339665 0.24235432 0.16192524 0.18050455 0.41499415 0.21660031 0.38733715 0.43207392 0.23064193; 0.16366918 0.008529371 0.036853492 0.018185081 0.057695292 0.12094624 0.07630184 -0.011614937 0.012737181 0.173724; 0.14474952 0.19103736 0.1090886 0.08852501 0.14772236 0.10033486 0.12594518 0.16158527 0.08090371 0.16053662; 0.32059592 0.13817215 0.25556487 0.3619385 0.1361927 0.34184596 0.42664242 0.20382118 0.15213369 0.428005; 0.38914698 0.34429085 0.43361163 0.1414494 0.38538712 0.49637955 0.32894653 0.38855922 0.5757681 0.2794177]
acc += (sum(one_cpu_mod_x .== one_cpu_y) * 1) / size(x,2) = 0.3
accuracy(datatest,m) = 0.3

模型（x）似乎发生了变化（更新），实际上成本函数收敛了。同时，如果我将上面的所有代码（main.jl）放在这样的函数中：

function all_the_code()
    ....
    ...
    for i = 1:ep_max # run ep_max times for a single batch
      println()
      println("**************")
      println(i)
      println()
      Flux.train!(loss,time_show));
      println()
      @show accuracy(datatest,m)
    end
return
end

我明白了

julia> all_the_code()
size of X data is :(1000,m) = 0.36721910246630546


mod_x = Float32[-0.23175366 -0.0057259724 0.082216755 -0.028256565 0.0046515726 5.131215f-5 0.24094917 0.2069467 -0.12277043 0.25271556; 0.3987115 0.698753 0.41566908 0.04473591 -0.25956866 0.115343675 0.5015237 -0.32195306 0.14039147 -0.6866529; -0.061519355 -0.40676624 -0.23660009 -0.04070711 0.07426359 -0.058668774 -0.32793295 0.096719205 -0.103397384 0.3026058; -0.19165385 0.047038484 0.08517692 -0.10537018 0.107321024 -0.033743735 0.14160846 0.16544174 -0.061927572 0.08228015; 0.080324054 0.095603675 -0.37848675 0.8972529 -0.84439826 0.3704224 0.25929335 -0.44578144 0.2532668 0.09203011; 0.2906073 0.16897422 0.2946054 -0.3970689 0.16302669 0.034337603 0.072528295 -0.09895017 -0.008754879 -0.30081]
acc += (sum(one_cpu_mod_x .== one_cpu_y) * 1) / size(x,m) = 0.3670469086875309


mod_x = Float32[-0.23175366 -0.0057259724 0.082216755 -0.028256565 0.0046515726 5.131215f-5 0.24094917 0.2069467 -0.12277043 0.25271556; 0.3987115 0.698753 0.41566908 0.04473591 -0.25956866 0.115343675 0.5015237 -0.32195306 0.14039147 -0.6866529; -0.061519355 -0.40676624 -0.23660009 -0.04070711 0.07426359 -0.058668774 -0.32793295 0.096719205 -0.103397384 0.3026058; -0.19165385 0.047038484 0.08517692 -0.10537018 0.107321024 -0.033743735 0.14160846 0.16544174 -0.061927572 0.08228015; 0.080324054 0.095603675 -0.37848675 0.8972529 -0.84439826 0.3704224 0.25929335 -0.44578144 0.2532668 0.09203011; 0.2906073 0.16897422 0.2946054 -0.3970689 0.16302669 0.034337603 0.072528295 -0.09895017 -0.008754879 -0.30081]
acc += (sum(one_cpu_mod_x .== one_cpu_y) * 1) / size(x,m) = 0.2

因此，您看到了，该模型没有更新（成本函数始终保持在0.36附近）。无论迭代多少次。发生了什么事？

我不希望继续使用REPL。我总是尽量避免使用全局变量，因此我需要进入function()内，但是我不知道为什么会这样而盲目。

PD：两个实验的数据相同。

使用Flux运行功能时未更新模型

如何解决使用Flux运行功能时未更新模型

相关推荐