如何解决模型在 46-49% 时停止提高验证准确性,它似乎过度拟合
我正在处理这个数据集:
https://www.kaggle.com/dionyshsmiaris/xrays 包括类的 X 射线:
- 0="正常"
- 1="pneumonia_cause_viral_infection"
- 2="pneumonia_cause_bacteria"
我正在使用 depth=50 的自定义 resnet(虽然我尝试了 20,34,101),但结果相同。
这是我导入的数据:
train_dir = "/content/gdrive/MyDrive/Xraydataset/train_images/"
test_dir = "/content/gdrive/MyDrive/Xraydataset/test_images/"
def get_data(folder):
X = []
y = []
for image_filename in tqdm(os.listdir(folder)):
img_file = tf.keras.preprocessing.image.load_img(folder + '/' + image_filename,color_mode="grayscale")
if img_file is not None:
img_file=img_file.resize((224,224),1)
img_arr = np.asarray(img_file)
X.append(img_arr)
label=pd.read_csv('/content/gdrive/MyDrive/Xraydataset/labels_train.csv',usecols = ['class_id'])
X = np.asarray(X)
y = np.asarray(label)
return X,y
X_train,y_train = get_data(train_dir)
X_test,y_test= get_data(test_dir)
预处理、归一化、分类:
#print (X_train) #X_train normalised
X_train=np.expand_dims(X_train,axis=3)
X_test=np.expand_dims(X_test,axis=3)
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255
X_train_mean = np.mean(X_train,axis=0)
X_train -= X_train_mean
X_test -= X_train_mean
print('x_train shape:',X_train.shape)
print(X_train.shape[0],'train samples')
print(X_test.shape[0],'test samples')
xtrain=X_train[:3700]
ytrain=y_train[:3700]
#split data to train and test
xtest=X_train[3700:]
ytest=y_train[3700:]
t_train = keras.utils.to_categorical(ytrain,3)
t_test = keras.utils.to_categorical(ytest,3)
Resnet 层:
def resnet_layer(inputs,num_filters=16,#16
kernel_size=3,#3
strides=1,activation='relu',batch_normalization=True,conv_first=True):
conv = Conv2D(num_filters,kernel_size=kernel_size,strides=strides,padding='same',kernel_initializer='he_normal',kernel_regularizer=l2(1e-4))
x = inputs
if conv_first:
x = conv(x)
if batch_normalization:
x = BatchNormalization()(x)
if activation is not None:
x = Activation(activation)(x)
else:
if batch_normalization:
x = BatchNormalization()(x)
if activation is not None:
x = Activation(activation)(x)
x = conv(x)
return x
ResnetV1:
def resnet_v1(input_shape,depth,num_classes=3):
if (depth - 2) % 6 != 0:
raise ValueError('depth should be 6n+2 (eg 20,32,44 in [a])')
# Start model definition.
num_filters = 16
num_res_blocks = int((depth - 2) / 6)
inputs = Input(shape=input_shape)
x = resnet_layer(inputs=inputs)
# Instantiate the stack of residual units
for stack in range(3):
for res_block in range(num_res_blocks):
strides = 1
if stack > 0 and res_block == 0: # first layer but not first stack
strides = 2 # downsample
y = resnet_layer(inputs=x,num_filters=num_filters,strides=strides)
y = resnet_layer(inputs=y,activation=None)
if stack > 0 and res_block == 0: # first layer but not first stack
# linear projection residual shortcut connection to match
# changed dims
x = resnet_layer(inputs=x,kernel_size=2,### originally: 1,activation=None,batch_normalization=False)
x = keras.layers.add([x,y])
x = Activation('relu')(x)
num_filters *= 2
# Add classifier on top.
# v1 does not use BN after last shortcut connection-ReLU
x = AveragePooling2D(pool_size=8)(x)#8
y = Flatten()(x)
outputs = Dense(num_classes,activation='softmax',kernel_initializer='he_normal')(y)
# Instantiate model.
model = Model(inputs=inputs,outputs=outputs)
print('Model parameters: {:d}'.format(model.count_params()))
return model
学习率:
def lr_schedule(epoch):
lr = 1e-3
if epoch > 180:
lr *= 0.5e-3
elif epoch > 160:
lr *= 1e-3
elif epoch > 120:
lr *= 1e-2
elif epoch > 80:
lr *= 1e-1
return lr
编译:
depth=50#50
input_shape = X_train.shape[1:]
model = resnet_v1(input_shape=input_shape,depth=depth)
model.compile(loss="categorical_crossentropy",# possible
optimizer=Adam(lr=lr_schedule(0)),metrics=['acc'])
Datagen、批量大小、保存模型、ReduceLROnPlateau :
batch_size =16#32 # orig paper trained all networks with batch_size=128 me 128 crusharei
epochs = 200
# Prepare model model saving directory.
model_name = 'resnet50F1-e{epoch:04d}-loss{loss:.3f}-acc{acc:.3f}-valloss{val_loss:.3f}-valacc{val_acc:.3f}.h5'
if not os.path.isdir(save_dir):
os.makedirs(save_dir)
filepath = os.path.join(save_dir,model_name)
# Prepare callbacks for model saving and for learning rate adjustment.
checkpoint = ModelCheckpoint(filepath=filepath,monitor='val_acc',verbose=1,save_best_only=True)
lr_scheduler = LearningRateScheduler(lr_schedule)
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),cooldown=0,patience=5,min_lr=0.5e-6)
# This will do preprocessing and realtime data augmentation:
datagen = ImageDataGenerator(
# set input mean to 0 over the dataset
featurewise_center=True,# set each sample mean to 0
samplewise_center=True,# divide inputs by std of dataset
featurewise_std_normalization=True,#true
# divide each input by its std
samplewise_std_normalization=True,# apply ZCA whitening
zca_whitening=False,#true???
# epsilon for ZCA whitening
zca_epsilon=1e-06,#05?
# randomly rotate images in the range (deg 0 to 180)
rotation_range=0.,# randomly shift images horizontally
width_shift_range=0.,#0.1
# randomly shift images vertically
height_shift_range=0.,#0.1
# set range for random shear
shear_range=0.,# set range for random zoom
zoom_range=0.,# set range for random channel shifts
channel_shift_range=0,# set mode for filling points outside the input boundaries
fill_mode='nearest',# value used for fill_mode = "constant"
cval=0.,# randomly flip images
horizontal_flip=True,#True
# randomly flip images
vertical_flip=False,# set rescaling factor (applied before any other transformation)
rescale=None,#none
# set function that will be applied on each input
preprocessing_function=None,# image data format,either "channels_first" or "channels_last"
data_format=None,# fraction of images reserved for validation (strictly between 0 and 1)
validation_split=0.0)
# Compute quantities required for featurewise normalization
# (std,mean,and principal components if ZCA whitening is applied).
datagen.fit(xtrain)
培训:
history = model.fit(datagen.flow(xtrain,t_train,batch_size=batch_size),validation_data=(xtest,t_test),epochs=epochs,verbose=0,workers=4,steps_per_epoch = int(xtrain.shape[0]/batch_size),callbacks=[lr_reducer,lr_scheduler,MyCallback(),checkpoint])
# Score trained model.
scores = model.evaluate(xtest,t_test,verbose=1)
print('Test loss:',scores[0])
print('Test accuracy:',scores[1])
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。