模型在 46-49% 时停止提高验证准确性，它似乎过度拟合

如何解决模型在 46-49% 时停止提高验证准确性，它似乎过度拟合

我正在处理这个数据集：

https://www.kaggle.com/dionyshsmiaris/xrays 包括类的 X 射线：

0="正常"
1="pneumonia_cause_viral_infection"
2="pneumonia_cause_bacteria"

我正在使用 depth=50 的自定义 resnet（虽然我尝试了 20,34,101），但结果相同。

这是我导入的数据：

train_dir = "/content/gdrive/MyDrive/Xraydataset/train_images/"
test_dir =  "/content/gdrive/MyDrive/Xraydataset/test_images/"
def get_data(folder):
  X = []
  y = []
  for image_filename in tqdm(os.listdir(folder)):
    img_file = tf.keras.preprocessing.image.load_img(folder + '/' + image_filename,color_mode="grayscale")
    if img_file is not None:
      img_file=img_file.resize((224,224),1) 
      img_arr = np.asarray(img_file)
      X.append(img_arr)
  label=pd.read_csv('/content/gdrive/MyDrive/Xraydataset/labels_train.csv',usecols = ['class_id'])
  X = np.asarray(X)
  y = np.asarray(label)

  return X,y
X_train,y_train = get_data(train_dir)
X_test,y_test= get_data(test_dir)

预处理、归一化、分类：

#print (X_train) #X_train normalised
X_train=np.expand_dims(X_train,axis=3)
X_test=np.expand_dims(X_test,axis=3)
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255
X_train_mean = np.mean(X_train,axis=0)
X_train -= X_train_mean
X_test -= X_train_mean
print('x_train shape:',X_train.shape)
print(X_train.shape[0],'train samples')
print(X_test.shape[0],'test samples')
xtrain=X_train[:3700]
ytrain=y_train[:3700]
#split data to train and test
xtest=X_train[3700:]
ytest=y_train[3700:]
t_train = keras.utils.to_categorical(ytrain,3)
t_test = keras.utils.to_categorical(ytest,3)

Resnet 层：

def resnet_layer(inputs,num_filters=16,#16
                 kernel_size=3,#3
                 strides=1,activation='relu',batch_normalization=True,conv_first=True):
    
    conv = Conv2D(num_filters,kernel_size=kernel_size,strides=strides,padding='same',kernel_initializer='he_normal',kernel_regularizer=l2(1e-4))

    x = inputs
    if conv_first:
        x = conv(x)
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
    else:
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
        x = conv(x)
    return x

ResnetV1：

def resnet_v1(input_shape,depth,num_classes=3):
  
    if (depth - 2) % 6 != 0:
        raise ValueError('depth should be 6n+2 (eg 20,32,44 in [a])')
    # Start model definition.
    num_filters = 16
    num_res_blocks = int((depth - 2) / 6)

    inputs = Input(shape=input_shape)
    x = resnet_layer(inputs=inputs)
    # Instantiate the stack of residual units
    for stack in range(3):
        for res_block in range(num_res_blocks):
            strides = 1
            if stack > 0 and res_block == 0:  # first layer but not first stack
                strides = 2  # downsample
            y = resnet_layer(inputs=x,num_filters=num_filters,strides=strides)
            y = resnet_layer(inputs=y,activation=None)
            if stack > 0 and res_block == 0:  # first layer but not first stack
                # linear projection residual shortcut connection to match
                # changed dims
                x = resnet_layer(inputs=x,kernel_size=2,### originally: 1,activation=None,batch_normalization=False)
            x = keras.layers.add([x,y])
            x = Activation('relu')(x)
        num_filters *= 2

    # Add classifier on top.
    # v1 does not use BN after last shortcut connection-ReLU

    x = AveragePooling2D(pool_size=8)(x)#8
    y = Flatten()(x)
    outputs = Dense(num_classes,activation='softmax',kernel_initializer='he_normal')(y)          

    # Instantiate model.
    model = Model(inputs=inputs,outputs=outputs)
    print('Model parameters: {:d}'.format(model.count_params()))
    return model

学习率：

def lr_schedule(epoch):
    lr = 1e-3
    if epoch > 180:
        lr *= 0.5e-3
    elif epoch > 160:
        lr *= 1e-3
    elif epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    return lr

编译：

depth=50#50
input_shape = X_train.shape[1:]

model = resnet_v1(input_shape=input_shape,depth=depth)
    
model.compile(loss="categorical_crossentropy",# possible
              optimizer=Adam(lr=lr_schedule(0)),metrics=['acc'])

Datagen、批量大小、保存模型、ReduceLROnPlateau :

batch_size =16#32  # orig paper trained all networks with batch_size=128 me 128 crusharei
epochs = 200

# Prepare model model saving directory.
model_name = 'resnet50F1-e{epoch:04d}-loss{loss:.3f}-acc{acc:.3f}-valloss{val_loss:.3f}-valacc{val_acc:.3f}.h5'
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir,model_name)

# Prepare callbacks for model saving and for learning rate adjustment.
checkpoint = ModelCheckpoint(filepath=filepath,monitor='val_acc',verbose=1,save_best_only=True)

lr_scheduler = LearningRateScheduler(lr_schedule)

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),cooldown=0,patience=5,min_lr=0.5e-6)

# This will do preprocessing and realtime data augmentation:
datagen = ImageDataGenerator(
    # set input mean to 0 over the dataset
    featurewise_center=True,# set each sample mean to 0
    samplewise_center=True,# divide inputs by std of dataset
    featurewise_std_normalization=True,#true
    # divide each input by its std
    samplewise_std_normalization=True,# apply ZCA whitening
    zca_whitening=False,#true???
    # epsilon for ZCA whitening
    zca_epsilon=1e-06,#05?
    # randomly rotate images in the range (deg 0 to 180)
    rotation_range=0.,# randomly shift images horizontally
    width_shift_range=0.,#0.1
    # randomly shift images vertically
    height_shift_range=0.,#0.1
    # set range for random shear
    shear_range=0.,# set range for random zoom
    zoom_range=0.,# set range for random channel shifts
    channel_shift_range=0,# set mode for filling points outside the input boundaries
    fill_mode='nearest',# value used for fill_mode = "constant"
    cval=0.,# randomly flip images
    horizontal_flip=True,#True
    # randomly flip images
    vertical_flip=False,# set rescaling factor (applied before any other transformation)
    rescale=None,#none
    # set function that will be applied on each input
    preprocessing_function=None,# image data format,either "channels_first" or "channels_last"
    data_format=None,# fraction of images reserved for validation (strictly between 0 and 1)
    validation_split=0.0)

# Compute quantities required for featurewise normalization
# (std,mean,and principal components if ZCA whitening is applied).
datagen.fit(xtrain)

培训：

history = model.fit(datagen.flow(xtrain,t_train,batch_size=batch_size),validation_data=(xtest,t_test),epochs=epochs,verbose=0,workers=4,steps_per_epoch = int(xtrain.shape[0]/batch_size),callbacks=[lr_reducer,lr_scheduler,MyCallback(),checkpoint])


# Score trained model.
scores = model.evaluate(xtest,t_test,verbose=1)
print('Test loss:',scores[0])
print('Test accuracy:',scores[1])

模型在 46-49% 时停止提高验证准确性，它似乎过度拟合

如何解决模型在 46-49% 时停止提高验证准确性，它似乎过度拟合

相关推荐