如何解决R中的时间序列预测;绘制“事件”并在初始预测后以指定的日期范围生成新的预测图
我创建了一个函数,该函数可以让我使用fable
包进行时间序列预测。该功能的想法是分析特定日期/事件后的观测值与预测值。这是一个模拟数据框,它生成一列日期:-
set.seed(1)
df <- data.frame(Date = sort(sample(seq(as.Date('2018/01/01'),as.Date('2020/09/17'),by="day"),1368883,replace = T)))
这是我创建的函数。您指定数据,然后指定事件的日期,然后指定以天为单位的预测期间,最后指定图形的标题。
event_analysis<-function(data,eventdate,period,title){
require(dplyr)
require(tsibble)
require(fable)
require(fabletools)
require(imputeTS)
require(ggplot2)
data_count<-data%>%
group_by(Date)%>%
summarise(Count=n())
data_count<-as_tsibble(data_count)
data_count<-na_mean(data_count)
train <- data_count %>%
#sample_frac(0.8)
filter(Date<=as.Date(eventdate))
fit <- train %>%
model(
ets = ETS(Count),arima = ARIMA(Count),snaive = SNAIVE(Count)
) %>%
mutate(mixed = (ets + arima + snaive) / 3)
fc <- fit %>% forecast(h = period)
forecastplot<-fc %>%
autoplot(data_count,level = NULL)+ggtitle(title)+
geom_vline(xintercept = as.Date(eventdate),linetype="dashed",color="red")+
labs(caption = "Red dashed line = Event occurrence")
fc_accuracy<-accuracy(fc,data_count)
#obs<-data_count
#colnames(obs)[2]<-"Observed"
#obs_pred<-merge(data_count,fc_accuracy,by="Date")
return(list(forecastplot,fc))
}
然后一次运行,我指定df
,事件日期,我要预测的天数(3周),然后指定标题:-
event_analysis(df,"2020-01-01",21,"Event forecast")
哪个将打印此结果并绘图:-
我承认我制作的模拟数据并不完全理想,但该功能在我的真实数据上运行良好。
这是我想要实现的目标。我希望从函数中获得此输出,但是此外,我还希望有一个附加图,它在预测的时间段内“放大”,原因有两个:-
- 为便于解释
- 我希望能够看到N个天数 之前 和N个天数 之后 事件日期(N代表预测期,即21)。
因此,另一幅图(连同原始的完整预测)看起来可能像这样,也许在一个输出中为“ multiplot”样式:-
另一件事是打印另一个输出,该输出显示测试集中的观测值与预测所用模型的预测值。
基本上,这是我想添加到函数中的另外两件事,但是我不确定该怎么做。非常感谢您的任何帮助:)。
解决方法
我想您可以用这种方式重写它。我做了一些调整以帮助您。
set.seed(1)
df <- data.frame(Date = sort(sample(seq(as.Date('2018/01/01'),as.Date('2020/09/17'),by="day"),1368883,replace = T)))
event_analysis <- function(data,eventdate,period,title){
# in the future,you may think to move them out
library(dplyr)
library(tsibble)
library(fable)
library(fabletools)
library(imputeTS)
library(ggplot2)
# convert at the beginning
eventdate <- as.Date(eventdate)
# more compact sintax
data_count <- count(data,Date,name = "Count")
# better specify the date variable to avoid the message
data_count <- as_tsibble(data_count,index = Date)
# you need to complete missing dates,just in case
data_count <- tsibble::fill_gaps(data_count)
data_count <- na_mean(data_count)
train <- data_count %>%
filter(Date <= eventdate)
test <- data_count %>%
filter(Date > eventdate,Date <= (eventdate+period))
fit <- train %>%
model(
ets = ETS(Count),arima = ARIMA(Count),snaive = SNAIVE(Count)
) %>%
mutate(mixed = (ets + arima + snaive) / 3)
fc <- fit %>% forecast(h = period)
# your plot
forecastplot <- fc %>%
autoplot(data_count,level = NULL) +
ggtitle(title) +
geom_vline(xintercept = as.Date(eventdate),linetype = "dashed",color = "red") +
labs(caption = "Red dashed line = Event occurrence")
# plot just forecast and test
zoomfcstplot <- autoplot(fc) + autolayer(test,.vars = Count)
fc_accuracy <- accuracy(fc,data_count)
### EDIT: ###
# results vs test
res <- fc %>%
as_tibble() %>%
select(-Count) %>%
tidyr::pivot_wider(names_from = .model,values_from = .mean) %>%
inner_join(test,by = "Date")
##############
return(list(forecastplot = forecastplot,zoomplot = zoomfcstplot,accuracy = fc_accuracy,forecast = fc,results = res))
}
event_analysis(df,eventdate = "2020-01-01",period = 21,title = "Event forecast")
输出:
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter,lag
#> The following objects are masked from 'package:base':
#>
#> intersect,setdiff,setequal,union
#> Carico il pacchetto richiesto: fabletools
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
#> $forecastplot
#>
#> $zoomplot
#>
#> $accuracy
#> # A tibble: 4 x 9
#> .model .type ME RMSE MAE MPE MAPE MASE ACF1
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 arima Test -16.8 41.8 35.2 -1.31 2.61 0.791 0.164
#> 2 ets Test -16.8 41.8 35.2 -1.31 2.61 0.791 0.164
#> 3 mixed Test -21.9 44.7 36.8 -1.68 2.73 0.825 -0.0682
#> 4 snaive Test -32.1 57.3 46.6 -2.43 3.45 1.05 -0.377
#>
#> $forecast
#> # A fable: 84 x 4 [1D]
#> # Key: .model [4]
#> .model Date Count .mean
#> <chr> <date> <dist> <dbl>
#> 1 ets 2020-01-02 N(1383,1505) 1383.
#> 2 ets 2020-01-03 N(1383,1505) 1383.
#> 3 ets 2020-01-04 N(1383,1505) 1383.
#> 4 ets 2020-01-05 N(1383,1505) 1383.
#> 5 ets 2020-01-06 N(1383,1505) 1383.
#> 6 ets 2020-01-07 N(1383,1505) 1383.
#> 7 ets 2020-01-08 N(1383,1505) 1383.
#> 8 ets 2020-01-09 N(1383,1505) 1383.
#> 9 ets 2020-01-10 N(1383,1505) 1383.
#> 10 ets 2020-01-11 N(1383,1505) 1383.
#> # ... with 74 more rows
#>
#> $results
#> # A tibble: 21 x 6
#> Date ets arima snaive mixed Count
#> <date> <dbl> <dbl> <dbl> <dbl> <int>
#> 1 2020-01-02 1383. 1383. 1386 1384. 1350
#> 2 2020-01-03 1383. 1383. 1366 1377. 1398
#> 3 2020-01-04 1383. 1383. 1426 1397. 1357
#> 4 2020-01-05 1383. 1383. 1398 1388. 1415
#> 5 2020-01-06 1383. 1383. 1431 1399. 1399
#> 6 2020-01-07 1383. 1383. 1431 1399. 1346
#> 7 2020-01-08 1383. 1383. 1350 1372. 1299
#> 8 2020-01-09 1383. 1383. 1386 1384. 1303
#> 9 2020-01-10 1383. 1383. 1366 1377. 1365
#> 10 2020-01-11 1383. 1383. 1426 1397. 1328
#> # ... with 11 more rows
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。