r中具有不同变量条件的循环线性模型

如何解决r中具有不同变量条件的循环线性模型

我想为线性模型做一个循环，但是遇到问题。

首先，我编写一个循环（无法运行）以提取所需的beta值。

y <- c('y1','y2')
x1 <- c('a1','a2','a3')
x2 <- c('A','B')

for (y in y) {
  for (x1 in x1) {
    for (x2 in x2) {
      m <- lm(as.name(y) ~ as.name(x1) + as.name(x2),data = dat) %>% summary() %>% .$coefficients %>% .[2,1]
      
    }
    
  }
  
}

for循环中y，x1和x2的组合不是我的期望。 lm模型中的公式如下：expand.grid(y,x1,x2)。我所期望的是在独立变量位置上相同字母的所有组合。这是我的原始代码：

m1 <- lm(y1 ~ a1 + A,dat) %>% summary() %>% .$coefficients %>% .[2,1]
m2 <- lm(y1 ~ a2 + A,1]
m3 <- lm(y1 ~ a3 + A,1]
m4 <- lm(y2 ~ a1 + A,1]
m5 <- lm(y2 ~ a2 + A,1]
m6 <- lm(y2 ~ a3 + A,1]
m7 <- lm(y1 ~ b1 + B,1]
m8 <- lm(y1 ~ b2 + B,1]
m9 <- lm(y1 ~ b3 + B,1]
m10 <- lm(y2 ~ b1 + B,1]
m11 <- lm(y2 ~ b2 + B,1]
m12 <- lm(y2 ~ b3 + B,1]```

这是我的数据。

任何帮助将不胜感激！

dat <- structure(list(y1 = c(0.838141931,0.174850172,0.116144283,0.113778511,0.494270733,0.874482265,0.325621743,0.661045636,0.452396096),y2 = c(0.487877797,0.360726955,0.380614137,0.169760207,0.359371965,0.743837108,0.156535906,0.995989192,0.331058618
),a1 = c(0.336537159,0.446060609,0.57586382,0.09629329,0.491634112,0.988226873,0.929105257,0.605957031,0.470720774),a2 = c(0.128615421,0.831986313,0.267777151,0.313178319,0.7776461,0.863337292,0.042818986,0.830029959,0.901586271),a3 = c(0.291053766,0.546719865,0.918797744,0.976353885,0.193777436,0.953859399,0.963312236,0.191449484,0.825034161),b1 = c(0.31510338,0.5441007,0.515466925,0.030702511,0.020932599,0.334734486,0.586588252,0.562970761,0.848337089),b2 = c(0.426787995,0.350719803,0.706471337,0.346462166,0.099766511,0.219781154,0.565047862,0.50282167,0.727813725
),b3 = c(0.799666435,0.07225825,0.409411895,0.701122141,0.529991257,0.478439097,0.79467065,0.442156618,0.026693511
),A = c(0.43143391,0.662313075,0.584967093,0.866110621,0.598682492,0.14665666,0.454315631,0.448968611,0.238969939),B = c(0.060625179,0.410312393,0.614411256,0.127343899,0.90370096,0.882024428,0.681389602,0.56535592,0.850829599)),class = c("spec_tbl_df","tbl_df","tbl","data.frame"),row.names = c(NA,-9L),spec = structure(list(
    cols = list(y1 = structure(list(),class = c("collector_double","collector")),y2 = structure(list(),a1 = structure(list(),a2 = structure(list(),a3 = structure(list(),b1 = structure(list(),b2 = structure(list(),b3 = structure(list(),A = structure(list(),B = structure(list(),"collector"))),default = structure(list(),class = c("collector_guess",skip = 1),class = "col_spec"))

解决方法

我不确定我是否正确，但是看来您整理数据的方式有误。也许这就是您可能想要的：

new_dat = data.frame(
  y = c(rep(dat$y1,3 + 3),rep(dat$y2,3 + 3)),x = c(rep(c(dat$a1,dat$a2,dat$a3),2),rep(c(dat$b1,dat$b2,dat$b3),2)),z = c(rep(dat$A,rep(dat$B,3 + 3))
)

models = with(new_dat,mapply(function(y,x,z) model = lm(y ~ x + z),y,z,SIMPLIFY = FALSE))

让我们知道这是否正是您想要的。如果没有，请详细说明...

使用expand.grid可以创建y，x1和x2的所有组合。使用sprintf将公式创建为字符串。

df <- expand.grid(y,x1,x2)
formula_strings <- sprintf('%s ~ %s + %s',df$Var1,df$Var2,df$Var3)
formula_strings

# [1] "y1 ~ a1 + A" "y2 ~ a1 + A" "y1 ~ a2 + A" "y2 ~ a2 + A"
# [5] "y1 ~ a3 + A" "y2 ~ a3 + A" "y1 ~ a1 + B" "y2 ~ a1 + B"
# [9] "y1 ~ a2 + B" "y2 ~ a2 + B" "y1 ~ a3 + B" "y2 ~ a3 + B"

使用sapply应用模型并从每个模型中提取系数。

values <- sapply(formula_strings,function(x) 
                 lm(x,dat) %>% summary() %>% .$coefficients %>% .[2,1])
values

#y1 ~ a1 + A y2 ~ a1 + A y1 ~ a2 + A y2 ~ a2 + A y1 ~ a3 + A y2 ~ a3 + A 
#  -0.254943   -0.005956   -0.006177    0.286670   -0.393284   -0.387501 
#y1 ~ a1 + B y2 ~ a1 + B y1 ~ a2 + B y2 ~ a2 + B y1 ~ a3 + B y2 ~ a3 + B 
#   0.506848    0.371615    0.182370    0.435684   -0.370723   -0.380668

如果您需要：y〜b [1-3] + B和y〜a [1-3] + A并且不需要y〜a [1-3] + B和y〜b [1- 3] + A，则可以像这样设置data.frame：

library(dplyr)

res = rbind(
expand.grid(response=c('y1','y2'),pre1=c('a1','a2','a3'),pre2='A',stringsAsFactors = FALSE),expand.grid(response=c('y1',pre1=c('b1','b2','b3'),pre2='B',stringsAsFactors = FALSE)
)

从代码的某些部分来看，似乎您只需要第二个系数，因此一种简单的基本R方法将是使用reformulate来为每一行构造公式：

res$coef = sapply(1:nrow(res),function(i){
predictor = as.character(res[i,c("pre1","pre2")])
response = res$response[i]
f = reformulate(predictor,response=response)
coefficients(lm(f,data=dat))[2]
})

       head(res)
  response pre1 pre2         coef
1       y1   a1    A -0.254942513
2       y2   a1    A -0.005955600
3       y1   a2    A -0.006177156
4       y2   a2    A  0.286669786
5       y1   a3    A -0.393284033
6       y2   a3    A -0.387500900

一个整洁的解决方案将是这样，我们首先将模型嵌套在其中

library(purrr)
library(broom)
library(tidyr)

res = rbind(
expand.grid(response=c('y1',stringsAsFactors = FALSE)
)

res = res %>% 
mutate(model=1:n()) %>% 
nest(param=c(c(response,pre1,pre2))) %>% 
mutate(
fit = map(param,~
lm(reformulate(c(.$pre1,.$pre2),response=.$response),data=dat)),tidied=map(fit,tidy)
)

模型和结果嵌套在小块中：

    res
    # A tibble: 12 x 4
   model param            fit    tidied          
   <int> <list>           <list> <list>          
 1     1 <tibble [1 × 3]> <lm>   <tibble [3 × 5]>
 2     2 <tibble [1 × 3]> <lm>   <tibble [3 × 5]>
 3     3 <tibble [1 × 3]> <lm>   <tibble [3 × 5]>
 4     4 <tibble [1 × 3]> <lm>   <tibble [3 × 5]>

如果需要第二个系数：

res %>% unnest(c(param,tidied)) %>% filter(term==pre1)
    # A tibble: 12 x 10
   model response pre1  pre2  fit    term  estimate std.error statistic p.value
   <int> <chr>    <chr> <chr> <list> <chr>    <dbl>     <dbl>     <dbl>   <dbl>
 1     1 y1       a1    A     <lm>   a1    -0.255       0.390   -0.653   0.538 
 2     2 y2       a1    A     <lm>   a1    -0.00596     0.479   -0.0124  0.990 
 3     3 y1       a2    A     <lm>   a2    -0.00618     0.246   -0.0251  0.981 
 4     4 y2       a2    A     <lm>   a2     0.287       0.268    1.07    0.325 
 5     5 y1       a3    A     <lm>   a3    -0.393       0.176   -2.24    0.0664
 6     6 y2       a3    A     <lm>   a3    -0.388       0.233   -1.66    0.148 
 7     7 y1       a1    B     <lm>   a1     0.507       0.541    0.937   0.385 
 8     8 y2       a1    B     <lm>   a1     0.372       0.510    0.729   0.493 
 9     9 y1       a2    B     <lm>   a2     0.182       0.391    0.466   0.657 
10    10 y2       a2    B     <lm>   a2     0.436       0.319    1.37    0.221 
11    11 y1       a3    B     <lm>   a3    -0.371       0.310   -1.19    0.277 
12    12 y2       a3    B     <lm>   a3    -0.381       0.276   -1.38    0.217

r中具有不同变量条件的循环线性模型

如何解决r中具有不同变量条件的循环线性模型

解决方法

相关推荐