如何解决LSTM NN给出了到达时间的裤子预测,我哪里做错了?
这是我第一次使用NN进行时间序列预测,我试图按小时预测下周或下个月的到达时间,但结果却很差, 进口
%matplotlib inline
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator # Generates batches for sequence data
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,LSTM,Dropout
from tensorflow.keras.callbacks import EarlyStopping
**Data processing**
def parser(x):
return datetime.strptime(x,'%Y-%m-%d %H:%M:%S')
cols= ['datetime','arrivals']#,'departures','occupancy']
df = pd.read_csv('/Users/mikesmith/Downloads/last3wkOcc.csv',usecols=cols,parse_dates=['datetime'],index_col=['datetime'],date_parser=parser)
print(df.head())
print(df.tail())
print(df.dtypes)
arrivals
datetime
2020-07-22 11:00:00 7.0
2020-07-22 12:00:00 9.0
2020-07-22 13:00:00 4.0
2020-07-22 14:00:00 6.0
2020-07-22 15:00:00 15.0
arrivals
datetime
2020-08-12 07:00:00 2.0
2020-08-12 08:00:00 4.0
2020-08-12 09:00:00 2.0
2020-08-12 10:00:00 4.0
2020-08-12 11:00:00 0.0
arrivals float64
dtype: object
print('Number of 60min Buckets ',len(df))# 505
test_pecent = 0.3 # 30 percent of data
len(df)*test_pecent
test_point = np.round(len(df)*test_pecent)
test_index =int(len(df) - test_point)
print('test index',test_index)
print('test date start',df.index[test_index])
train = df.iloc[:test_index]
test = df.iloc[test_index:]
Number of 60min Buckets 505
test index 353
test date start 2020-08-06 04:00:00
缩放数据
scaler = MinMaxScaler()
scaler.fit(train)
MinMaxScaler(copy=True,feature_range=(0,1))
scaled_train = scaler.transform(train)
scaled_test = scaler.transform(test)
LSTM模型
early_stop = EarlyStopping(monitor='val_loss',patience=2)
length = 84
batch_size = 1
n_features = 1
generator = TimeseriesGenerator(scaled_train,scaled_train,length=length,batch_size=batch_size)
validation_generator = TimeseriesGenerator(scaled_test,scaled_test,batch_size=batch_size)
model = Sequential()
model.add(LSTM(512,activation = 'relu',input_shape=(length,n_features)))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer = 'adam',loss='mse',metrics=['accuracy'])
model.fit(generator,epochs=20,validation_data=validation_generator,callbacks=[early_stop])
model.summary()
WARNING:tensorflow:sample_weight modes were coerced from
...
to
['...']
WARNING:tensorflow:sample_weight modes were coerced from
...
to
['...']
Train for 269 steps,validate for 68 steps
Epoch 1/20
269/269 [==============================] - 42s 157ms/step - loss: 0.0175 - accuracy: 0.2342 - val_loss: 0.0032 - val_accuracy: 0.3235
Epoch 2/20
269/269 [==============================] - 34s 125ms/step - loss: 0.0149 - accuracy: 0.2342 - val_loss: 0.0051 - val_accuracy: 0.3235
Epoch 3/20
269/269 [==============================] - 37s 137ms/step - loss: 0.0135 - accuracy: 0.2342 - val_loss: 0.0057 - val_accuracy: 0.3235
Model: "sequential_22"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_22 (LSTM) (None,512) 1052672
_________________________________________________________________
dropout_3 (Dropout) (None,512) 0
_________________________________________________________________
dense_21 (Dense) (None,1) 513
=================================================================
Total params: 1,053,185
Trainable params: 1,185
Non-trainable params: 0
_________________________________________________________________
预测
forecast = []
first_eval_batch = scaled_train[-length:]
current_batch = first_eval_batch.reshape((1,length,n_features))
for i in range(length):
current_pred = model.predict(current_batch)[0]
forecast.append(current_pred)
current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis = 1)
绘制预测值
df_forecast = pd.DataFrame(scaler.inverse_transform(forecast),index=df[-length:].index,columns = ['predictions'])
df_test = pd.concat ([df,df_forecast],axis =1)
print(df_test.head())
print('---------------------')
print(df_test.tail(7))
print('number of geusses',len((df_test[df_test['predictions'].notna()])))
arrivals predictions
datetime
2020-07-22 11:00:00 7.0 NaN
2020-07-22 12:00:00 9.0 NaN
2020-07-22 13:00:00 4.0 NaN
2020-07-22 14:00:00 6.0 NaN
2020-07-22 15:00:00 15.0 NaN
---------------------
arrivals predictions
datetime
2020-08-12 05:00:00 2.0 3.716657
2020-08-12 06:00:00 4.0 3.715892
2020-08-12 07:00:00 2.0 3.714540
2020-08-12 08:00:00 4.0 3.712937
2020-08-12 09:00:00 2.0 3.711422
2020-08-12 10:00:00 4.0 3.710272
2020-08-12 11:00:00 0.0 3.709655
number of geusses 84
绘制结果
plt.plot(df_test.index,df_test['arrivals'])
plt.plot(df_test.index,df_test['predictions'])
plt.show()
因此,您可以看到结果不是很准确,不确定我犯了什么错误,我尝试了长度和批次大小,任何帮助或评论都非常感谢
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。