如何解决r中具有不同变量条件的循环线性模型
我想为线性模型做一个循环,但是遇到问题。
首先,我编写一个循环(无法运行)以提取所需的beta值。
y <- c('y1','y2')
x1 <- c('a1','a2','a3')
x2 <- c('A','B')
for (y in y) {
for (x1 in x1) {
for (x2 in x2) {
m <- lm(as.name(y) ~ as.name(x1) + as.name(x2),data = dat) %>% summary() %>% .$coefficients %>% .[2,1]
}
}
}
for循环中y
,x1
和x2
的组合不是我的期望。 lm
模型中的公式如下:expand.grid(y,x1,x2)
。我所期望的是在独立变量位置上相同字母的所有组合。这是我的原始代码:
m1 <- lm(y1 ~ a1 + A,dat) %>% summary() %>% .$coefficients %>% .[2,1]
m2 <- lm(y1 ~ a2 + A,1]
m3 <- lm(y1 ~ a3 + A,1]
m4 <- lm(y2 ~ a1 + A,1]
m5 <- lm(y2 ~ a2 + A,1]
m6 <- lm(y2 ~ a3 + A,1]
m7 <- lm(y1 ~ b1 + B,1]
m8 <- lm(y1 ~ b2 + B,1]
m9 <- lm(y1 ~ b3 + B,1]
m10 <- lm(y2 ~ b1 + B,1]
m11 <- lm(y2 ~ b2 + B,1]
m12 <- lm(y2 ~ b3 + B,1]```
这是我的数据。
任何帮助将不胜感激!
dat <- structure(list(y1 = c(0.838141931,0.174850172,0.116144283,0.113778511,0.494270733,0.874482265,0.325621743,0.661045636,0.452396096),y2 = c(0.487877797,0.360726955,0.380614137,0.169760207,0.359371965,0.743837108,0.156535906,0.995989192,0.331058618
),a1 = c(0.336537159,0.446060609,0.57586382,0.09629329,0.491634112,0.988226873,0.929105257,0.605957031,0.470720774),a2 = c(0.128615421,0.831986313,0.267777151,0.313178319,0.7776461,0.863337292,0.042818986,0.830029959,0.901586271),a3 = c(0.291053766,0.546719865,0.918797744,0.976353885,0.193777436,0.953859399,0.963312236,0.191449484,0.825034161),b1 = c(0.31510338,0.5441007,0.515466925,0.030702511,0.020932599,0.334734486,0.586588252,0.562970761,0.848337089),b2 = c(0.426787995,0.350719803,0.706471337,0.346462166,0.099766511,0.219781154,0.565047862,0.50282167,0.727813725
),b3 = c(0.799666435,0.07225825,0.409411895,0.701122141,0.529991257,0.478439097,0.79467065,0.442156618,0.026693511
),A = c(0.43143391,0.662313075,0.584967093,0.866110621,0.598682492,0.14665666,0.454315631,0.448968611,0.238969939),B = c(0.060625179,0.410312393,0.614411256,0.127343899,0.90370096,0.882024428,0.681389602,0.56535592,0.850829599)),class = c("spec_tbl_df","tbl_df","tbl","data.frame"),row.names = c(NA,-9L),spec = structure(list(
cols = list(y1 = structure(list(),class = c("collector_double","collector")),y2 = structure(list(),a1 = structure(list(),a2 = structure(list(),a3 = structure(list(),b1 = structure(list(),b2 = structure(list(),b3 = structure(list(),A = structure(list(),B = structure(list(),"collector"))),default = structure(list(),class = c("collector_guess",skip = 1),class = "col_spec"))
解决方法
我不确定我是否正确,但是看来您整理数据的方式有误。也许这就是您可能想要的:
new_dat = data.frame(
y = c(rep(dat$y1,3 + 3),rep(dat$y2,3 + 3)),x = c(rep(c(dat$a1,dat$a2,dat$a3),2),rep(c(dat$b1,dat$b2,dat$b3),2)),z = c(rep(dat$A,rep(dat$B,3 + 3))
)
models = with(new_dat,mapply(function(y,x,z) model = lm(y ~ x + z),y,z,SIMPLIFY = FALSE))
让我们知道这是否正是您想要的。如果没有,请详细说明...
,使用expand.grid
可以创建y
,x1
和x2
的所有组合。使用sprintf
将公式创建为字符串。
df <- expand.grid(y,x1,x2)
formula_strings <- sprintf('%s ~ %s + %s',df$Var1,df$Var2,df$Var3)
formula_strings
# [1] "y1 ~ a1 + A" "y2 ~ a1 + A" "y1 ~ a2 + A" "y2 ~ a2 + A"
# [5] "y1 ~ a3 + A" "y2 ~ a3 + A" "y1 ~ a1 + B" "y2 ~ a1 + B"
# [9] "y1 ~ a2 + B" "y2 ~ a2 + B" "y1 ~ a3 + B" "y2 ~ a3 + B"
使用sapply
应用模型并从每个模型中提取系数。
values <- sapply(formula_strings,function(x)
lm(x,dat) %>% summary() %>% .$coefficients %>% .[2,1])
values
#y1 ~ a1 + A y2 ~ a1 + A y1 ~ a2 + A y2 ~ a2 + A y1 ~ a3 + A y2 ~ a3 + A
# -0.254943 -0.005956 -0.006177 0.286670 -0.393284 -0.387501
#y1 ~ a1 + B y2 ~ a1 + B y1 ~ a2 + B y2 ~ a2 + B y1 ~ a3 + B y2 ~ a3 + B
# 0.506848 0.371615 0.182370 0.435684 -0.370723 -0.380668
,
如果您需要:y〜b [1-3] + B和y〜a [1-3] + A并且不需要y〜a [1-3] + B和y〜b [1- 3] + A,则可以像这样设置data.frame:
library(dplyr)
res = rbind(
expand.grid(response=c('y1','y2'),pre1=c('a1','a2','a3'),pre2='A',stringsAsFactors = FALSE),expand.grid(response=c('y1',pre1=c('b1','b2','b3'),pre2='B',stringsAsFactors = FALSE)
)
从代码的某些部分来看,似乎您只需要第二个系数,因此一种简单的基本R方法将是使用reformulate
来为每一行构造公式:
res$coef = sapply(1:nrow(res),function(i){
predictor = as.character(res[i,c("pre1","pre2")])
response = res$response[i]
f = reformulate(predictor,response=response)
coefficients(lm(f,data=dat))[2]
})
head(res)
response pre1 pre2 coef
1 y1 a1 A -0.254942513
2 y2 a1 A -0.005955600
3 y1 a2 A -0.006177156
4 y2 a2 A 0.286669786
5 y1 a3 A -0.393284033
6 y2 a3 A -0.387500900
一个整洁的解决方案将是这样,我们首先将模型嵌套在其中
library(purrr)
library(broom)
library(tidyr)
res = rbind(
expand.grid(response=c('y1',stringsAsFactors = FALSE)
)
res = res %>%
mutate(model=1:n()) %>%
nest(param=c(c(response,pre1,pre2))) %>%
mutate(
fit = map(param,~
lm(reformulate(c(.$pre1,.$pre2),response=.$response),data=dat)),tidied=map(fit,tidy)
)
模型和结果嵌套在小块中:
res
# A tibble: 12 x 4
model param fit tidied
<int> <list> <list> <list>
1 1 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
2 2 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
3 3 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
4 4 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
如果需要第二个系数:
res %>% unnest(c(param,tidied)) %>% filter(term==pre1)
# A tibble: 12 x 10
model response pre1 pre2 fit term estimate std.error statistic p.value
<int> <chr> <chr> <chr> <list> <chr> <dbl> <dbl> <dbl> <dbl>
1 1 y1 a1 A <lm> a1 -0.255 0.390 -0.653 0.538
2 2 y2 a1 A <lm> a1 -0.00596 0.479 -0.0124 0.990
3 3 y1 a2 A <lm> a2 -0.00618 0.246 -0.0251 0.981
4 4 y2 a2 A <lm> a2 0.287 0.268 1.07 0.325
5 5 y1 a3 A <lm> a3 -0.393 0.176 -2.24 0.0664
6 6 y2 a3 A <lm> a3 -0.388 0.233 -1.66 0.148
7 7 y1 a1 B <lm> a1 0.507 0.541 0.937 0.385
8 8 y2 a1 B <lm> a1 0.372 0.510 0.729 0.493
9 9 y1 a2 B <lm> a2 0.182 0.391 0.466 0.657
10 10 y2 a2 B <lm> a2 0.436 0.319 1.37 0.221
11 11 y1 a3 B <lm> a3 -0.371 0.310 -1.19 0.277
12 12 y2 a3 B <lm> a3 -0.381 0.276 -1.38 0.217
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。