执行许多模拟以避免循环

如何解决执行许多模拟以避免循环

我正在尝试模拟拟合值以更改数据的不同子组的模型,这些值再次基于对原始数据帧的另一个子集的随机采样(我为这个问题组成的最小示例忽略了随机采样等,则所有模拟的拟合值都相同,但这无关紧要)。我编写了一个dplyr代码,用于存储每个组的模型,生成用于预测拟合值的新x值,预测它们等,等等。它生成一列拟合值,正是我想要的。但是,我想完成整个过程1000倍。我当然可以使用for循环(如下面的示例中所示)执行此操作,但是是否有可能在dplyr-pipe行中执行此操作?也许可以加快整个过程的速度(我的原始数据集很大,for循环要花很多时间)?

# making up data
dat <- data.frame("species" =  seq(1:20),"col_A" = runif(20,min=1000,max=2500),"col_B" = runif(20,min = 0,max = 1500),"maximum" = rep(2500,20),"minimum" = rep(1000,groups = rep(LETTERS[1:5],each = 4))

# functions to use with purrr
linear_mod <- function(dat) {
  lm(col_A ~ col_B,data = dat)
}

# define parametres and an empty data frame to use in the for loop
runs <- 10
fitted_sim <- data.frame(matrix(data=NA,nrow=20,ncol=runs+1,byrow=FALSE)) #empty dataframe to contain fitted values for each alt
names(fitted_sim) <- as.factor(seq(1:runs+1))

# for-loop around my dplyr-code
for (j in 1:runs){
  simul <- dat %>%
    group_by(groups) %>%
    nest(data = c(col_A,col_B,species)) %>%
    mutate(model = map(data,linear_mod),# add model for every group
           sim_data = list(seq(minimum,maximum,by = 10))) %>% # define new x-values for later predictions
    unnest(sim_data) %>%
    nest(sim_data = sim_data) %>%
    mutate(fitted = map2(model,sim_data,~predict(.x,col_B = sim_data,type = "response")),# predict values
    unnest(fitted) # unnest predicted values to save in data frame
  
  # save newly fitted values in fitted_sim data frame
  fitted_sim[,1] <- simul$groups
  fitted_sim[,1+j] <- simul$fitted
}

感谢每一个提示!

编辑: 这是一个扩展的示例代码,包括上述随机采样,但在我的第一个示例中省略了:

# for-loop around my dplyr-code
for (j in 1:runs) {
  simul <- dat %>%
    group_by(groups) %>%
    rowwise() %>%
    mutate(vector_column = case_when(abs(col_A) == (maximum-minimum) ~ list(col_B),sign(col_A) == 1 ~ list(dat$col_B[dat$col_B <= maximum - col_A]),# using list function to store vectors in a data.frame
                                     sign(col_A) != 1 ~ list(dat$col_B[dat$col_B >= minimum + col_A])),helper = !is_empty(vector_column),# in case some of the vectors are empty so it is not possible to use sample
           col_B_new = ifelse(helper,sample(vector_column,1),NA),helper = NULL,sim_data = list(seq(minimum,by = 10))) %>% # define new x-values for later predictions
    ungroup() %>% # get rid of rowwise()
    group_by(groups) %>%
    unnest(sim_data) %>%
    nest(sim_data = sim_data) %>%
    nest(data = c(col_A,col_B_new,fitted = map2(model,col_B_new = sim_data,type = "response"))) %>% # predict values
    unnest(fitted) # unnest predicted values to save in data frame
           
           # save newly fitted values in fitted_sim data frame
           fitted_sim[,1] <- simul$groups
           fitted_sim[,1+j] <- simul$fitted
}

解决方法

不确定从simul数据框架中到底需要什么,但是各个循环元素似乎彼此独立,因此您可以使用parallel:mclappy在其中运行它们平行。在我的示例中,我只使用lapply,但这已经快了很多-如果可以以任何方式帮助您...

library(tidyverse)
library(data.table)
library(microbenchmark)

# making up data
dat <- data.frame("species" =  seq(1:20),"col_A" = runif(20,min=1000,max=2500),"col_B" = runif(20,min = 0,max = 1500),"maximum" = rep(2500,20),"minimum" = rep(1000,groups = rep(LETTERS[1:5],each = 4))

# functions to use with purrr
linear_mod <- function(dat) {
    lm(col_A ~ col_B,data = dat)
}

# define parametres and an empty data frame to use in the for loop
runs <- 10
fitted_sim <- data.frame(matrix(data=NA,nrow=20,ncol=runs+1,byrow=FALSE)) #empty dataframe to contain fitted values for each alt
names(fitted_sim) <- as.factor(seq(1:runs+1))

# I just wrapped your code in a function for benchmarking
run.simul <- function(){
    for (j in 1:runs){
        simul <- dat %>%
            group_by(groups) %>%
            nest(data = c(col_A,col_B,species)) %>%
            mutate(model = map(data,linear_mod),# add model for every group
                   sim_data = list(seq(minimum,maximum,by = 10))) %>% # define new x-values for later predictions
            unnest(sim_data) %>%
            nest(sim_data = sim_data) %>%
            mutate(fitted = map2(model,sim_data,~predict(.x,col_B = sim_data,type = "response"))) %>% # predict values
            unnest(fitted) # unnest predicted values to save in data frame
        # save newly fitted values in fitted_sim data frame
        fitted_sim[,1] <- simul$groups
        fitted_sim[,1+j] <- simul$fitted
    }
    fitted_sim
}


# my version (sorry for using `data.table`...)
Dat <- data.table(dat,key="groups")
getFits <- function(x){
    x[,fitted := predict(lm(col_A ~ col_B),.(list(seq(minimum[1],maximum[1],by = 10)))),by = groups]
    x[,.(species,groups,fitted)]
}

# get the results into a data.frame; use mclapply instead of lapply if you like
dcast(rbindlist(lapply(seq_len(runs),function(z) getFits(data.table(dat,key="groups"))),idcol = "run"),... ~ run,value.var = "fitted")
#>     species groups        1        2        3        4        5        6
#>  1:       1      A 1767.621 1767.621 1767.621 1767.621 1767.621 1767.621
#>  2:       2      A 1376.679 1376.679 1376.679 1376.679 1376.679 1376.679
#>  3:       3      A 1523.100 1523.100 1523.100 1523.100 1523.100 1523.100
#>  4:       4      A 1794.593 1794.593 1794.593 1794.593 1794.593 1794.593
#>  5:       5      B 1272.408 1272.408 1272.408 1272.408 1272.408 1272.408
#>  6:       6      B 1967.709 1967.709 1967.709 1967.709 1967.709 1967.709
#>  7:       7      B 1792.934 1792.934 1792.934 1792.934 1792.934 1792.934
#>  8:       8      B 2318.699 2318.699 2318.699 2318.699 2318.699 2318.699
#>  9:       9      C 2017.187 2017.187 2017.187 2017.187 2017.187 2017.187
#> 10:      10      C 1899.827 1899.827 1899.827 1899.827 1899.827 1899.827
#> 11:      11      C 1734.953 1734.953 1734.953 1734.953 1734.953 1734.953
#> 12:      12      C 1834.046 1834.046 1834.046 1834.046 1834.046 1834.046
#> 13:      13      D 1797.615 1797.615 1797.615 1797.615 1797.615 1797.615
#> 14:      14      D 1915.832 1915.832 1915.832 1915.832 1915.832 1915.832
#> 15:      15      D 1841.489 1841.489 1841.489 1841.489 1841.489 1841.489
#> 16:      16      D 1798.442 1798.442 1798.442 1798.442 1798.442 1798.442
#> 17:      17      E 1641.359 1641.359 1641.359 1641.359 1641.359 1641.359
#> 18:      18      E 1631.253 1631.253 1631.253 1631.253 1631.253 1631.253
#> 19:      19      E 1634.197 1634.197 1634.197 1634.197 1634.197 1634.197
#> 20:      20      E 1634.991 1634.991 1634.991 1634.991 1634.991 1634.991
#>            7        8        9       10
#>  1: 1767.621 1767.621 1767.621 1767.621
#>  2: 1376.679 1376.679 1376.679 1376.679
#>  3: 1523.100 1523.100 1523.100 1523.100
#>  4: 1794.593 1794.593 1794.593 1794.593
#>  5: 1272.408 1272.408 1272.408 1272.408
#>  6: 1967.709 1967.709 1967.709 1967.709
#>  7: 1792.934 1792.934 1792.934 1792.934
#>  8: 2318.699 2318.699 2318.699 2318.699
#>  9: 2017.187 2017.187 2017.187 2017.187
#> 10: 1899.827 1899.827 1899.827 1899.827
#> 11: 1734.953 1734.953 1734.953 1734.953
#> 12: 1834.046 1834.046 1834.046 1834.046
#> 13: 1797.615 1797.615 1797.615 1797.615
#> 14: 1915.832 1915.832 1915.832 1915.832
#> 15: 1841.489 1841.489 1841.489 1841.489
#> 16: 1798.442 1798.442 1798.442 1798.442
#> 17: 1641.359 1641.359 1641.359 1641.359
#> 18: 1631.253 1631.253 1631.253 1631.253
#> 19: 1634.197 1634.197 1634.197 1634.197
#> 20: 1634.991 1634.991 1634.991 1634.991

run.simul()
#>    1        2        3        4        5        6        7        8        9
#> 1  A 1767.621 1767.621 1767.621 1767.621 1767.621 1767.621 1767.621 1767.621
#> 2  A 1376.679 1376.679 1376.679 1376.679 1376.679 1376.679 1376.679 1376.679
#> 3  A 1523.100 1523.100 1523.100 1523.100 1523.100 1523.100 1523.100 1523.100
#> 4  A 1794.593 1794.593 1794.593 1794.593 1794.593 1794.593 1794.593 1794.593
#> 5  B 1272.408 1272.408 1272.408 1272.408 1272.408 1272.408 1272.408 1272.408
#> 6  B 1967.709 1967.709 1967.709 1967.709 1967.709 1967.709 1967.709 1967.709
#> 7  B 1792.934 1792.934 1792.934 1792.934 1792.934 1792.934 1792.934 1792.934
#> 8  B 2318.699 2318.699 2318.699 2318.699 2318.699 2318.699 2318.699 2318.699
#> 9  C 2017.187 2017.187 2017.187 2017.187 2017.187 2017.187 2017.187 2017.187
#> 10 C 1899.827 1899.827 1899.827 1899.827 1899.827 1899.827 1899.827 1899.827
#> 11 C 1734.953 1734.953 1734.953 1734.953 1734.953 1734.953 1734.953 1734.953
#> 12 C 1834.046 1834.046 1834.046 1834.046 1834.046 1834.046 1834.046 1834.046
#> 13 D 1797.615 1797.615 1797.615 1797.615 1797.615 1797.615 1797.615 1797.615
#> 14 D 1915.832 1915.832 1915.832 1915.832 1915.832 1915.832 1915.832 1915.832
#> 15 D 1841.489 1841.489 1841.489 1841.489 1841.489 1841.489 1841.489 1841.489
#> 16 D 1798.442 1798.442 1798.442 1798.442 1798.442 1798.442 1798.442 1798.442
#> 17 E 1641.359 1641.359 1641.359 1641.359 1641.359 1641.359 1641.359 1641.359
#> 18 E 1631.253 1631.253 1631.253 1631.253 1631.253 1631.253 1631.253 1631.253
#> 19 E 1634.197 1634.197 1634.197 1634.197 1634.197 1634.197 1634.197 1634.197
#> 20 E 1634.991 1634.991 1634.991 1634.991 1634.991 1634.991 1634.991 1634.991
#>          10       NA
#> 1  1767.621 1767.621
#> 2  1376.679 1376.679
#> 3  1523.100 1523.100
#> 4  1794.593 1794.593
#> 5  1272.408 1272.408
#> 6  1967.709 1967.709
#> 7  1792.934 1792.934
#> 8  2318.699 2318.699
#> 9  2017.187 2017.187
#> 10 1899.827 1899.827
#> 11 1734.953 1734.953
#> 12 1834.046 1834.046
#> 13 1797.615 1797.615
#> 14 1915.832 1915.832
#> 15 1841.489 1841.489
#> 16 1798.442 1798.442
#> 17 1641.359 1641.359
#> 18 1631.253 1631.253
#> 19 1634.197 1634.197
#> 20 1634.991 1634.991

# benchmarking
microbenchmark(
    a=dcast(rbindlist(lapply(seq_len(runs),value.var = "fitted"),b=run.simul(),times= 10L
    )
#> Unit: milliseconds
#>  expr       min        lq      mean    median       uq      max neval cld
#>     a  46.46851  48.65027  51.09618  49.88775  54.7177  56.2888    10  a 
#>     b 389.76893 410.57588 418.16993 415.74493 424.2042 448.4604    10   b

reprex package(v0.3.0)于2020-08-13创建

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。

相关推荐


依赖报错 idea导入项目后依赖报错,解决方案:https://blog.csdn.net/weixin_42420249/article/details/81191861 依赖版本报错:更换其他版本 无法下载依赖可参考:https://blog.csdn.net/weixin_42628809/a
错误1:代码生成器依赖和mybatis依赖冲突 启动项目时报错如下 2021-12-03 13:33:33.927 ERROR 7228 [ main] o.s.b.d.LoggingFailureAnalysisReporter : *************************** APPL
错误1:gradle项目控制台输出为乱码 # 解决方案:https://blog.csdn.net/weixin_43501566/article/details/112482302 # 在gradle-wrapper.properties 添加以下内容 org.gradle.jvmargs=-Df
错误还原:在查询的过程中,传入的workType为0时,该条件不起作用 &lt;select id=&quot;xxx&quot;&gt; SELECT di.id, di.name, di.work_type, di.updated... &lt;where&gt; &lt;if test=&qu
报错如下,gcc版本太低 ^ server.c:5346:31: 错误:‘struct redisServer’没有名为‘server_cpulist’的成员 redisSetCpuAffinity(server.server_cpulist); ^ server.c: 在函数‘hasActiveC
解决方案1 1、改项目中.idea/workspace.xml配置文件,增加dynamic.classpath参数 2、搜索PropertiesComponent,添加如下 &lt;property name=&quot;dynamic.classpath&quot; value=&quot;tru
删除根组件app.vue中的默认代码后报错:Module Error (from ./node_modules/eslint-loader/index.js): 解决方案:关闭ESlint代码检测,在项目根目录创建vue.config.js,在文件中添加 module.exports = { lin
查看spark默认的python版本 [root@master day27]# pyspark /home/software/spark-2.3.4-bin-hadoop2.7/conf/spark-env.sh: line 2: /usr/local/hadoop/bin/hadoop: No s
使用本地python环境可以成功执行 import pandas as pd import matplotlib.pyplot as plt # 设置字体 plt.rcParams[&#39;font.sans-serif&#39;] = [&#39;SimHei&#39;] # 能正确显示负号 p
错误1:Request method ‘DELETE‘ not supported 错误还原:controller层有一个接口,访问该接口时报错:Request method ‘DELETE‘ not supported 错误原因:没有接收到前端传入的参数,修改为如下 参考 错误2:cannot r
错误1:启动docker镜像时报错:Error response from daemon: driver failed programming external connectivity on endpoint quirky_allen 解决方法:重启docker -&gt; systemctl r
错误1:private field ‘xxx‘ is never assigned 按Altʾnter快捷键,选择第2项 参考:https://blog.csdn.net/shi_hong_fei_hei/article/details/88814070 错误2:启动时报错,不能找到主启动类 #
报错如下,通过源不能下载,最后警告pip需升级版本 Requirement already satisfied: pip in c:\users\ychen\appdata\local\programs\python\python310\lib\site-packages (22.0.4) Coll
错误1:maven打包报错 错误还原:使用maven打包项目时报错如下 [ERROR] Failed to execute goal org.apache.maven.plugins:maven-resources-plugin:3.2.0:resources (default-resources)
错误1:服务调用时报错 服务消费者模块assess通过openFeign调用服务提供者模块hires 如下为服务提供者模块hires的控制层接口 @RestController @RequestMapping(&quot;/hires&quot;) public class FeignControl
错误1:运行项目后报如下错误 解决方案 报错2:Failed to execute goal org.apache.maven.plugins:maven-compiler-plugin:3.8.1:compile (default-compile) on project sb 解决方案:在pom.
参考 错误原因 过滤器或拦截器在生效时,redisTemplate还没有注入 解决方案:在注入容器时就生效 @Component //项目运行时就注入Spring容器 public class RedisBean { @Resource private RedisTemplate&lt;String
使用vite构建项目报错 C:\Users\ychen\work&gt;npm init @vitejs/app @vitejs/create-app is deprecated, use npm init vite instead C:\Users\ychen\AppData\Local\npm-