根据在R中包含字符串的任何变量上获取值来对变量进行突变

如何解决根据在R中包含字符串的任何变量上获取值来对变量进行突变

我有一个数据框，如果有许多其他列中的任何一个取值为1，则我想对一个变量取值为1的变量进行。

基本上我有多个变量，它们的名称中包含$VAR1 = { "ISO2:4166-23:US" => { Country => 'USA',State => 'Alabama' },"ISO2:4166-23:US" => { Country => 'USA',State => 'Washington' } }，并且我想创建一个新列resavoid1，如果某行的名称中有任何列的值为1，则该列== 1包含avoid或零。

这是我的数据帧的少量数据：

resavoid1

我想要的是，如果包含structure(list(pidp = structure(c(22445,280165,387605,541285,599765,665045,813285,1731965,1833965,2297045,2853965,3565925,3663845,3667245,3705325,4091565,4454005,4473725,4626045,4626725),label = "cross-wave person identifier (public release) ",format.stata = "%12.0g"),pid = structure(c(10127798,12430439,13361163,14396769,14757249,15270041,16441141,45754268,50832336,77185978,96577029,118692798,119065835,119074613,119277506,135447429,154358304,154588539,164246088,164246118),label = "personal identifier (BHPS cohort)",format.stata = "%12.0g",labels = c(missing = -9,inapplicable = -8,proxy = -7,refusal = -2,`don't know` = -1 ),class = c("haven_labelled","vctrs_vctr","double")),e_fnpid = structure(c(-8,-8,341490565,137280445,544803765,409500085,435306085,489864529,-8),label = "Natural father: PIDP",`don't know` = -1),e_mnpid = structure(c(272012925,750828805,341490569,137280449,205591205,205598685,544803769,477995125,748184965,3564565,435306089,299941205,489864525,label = "Natural mother: PIDP",e_pn1pid = structure(c(272012925,label = "Natural parent 1: PIDP",e_pn2pid = structure(c(-8,label = "Natural parent 2: PIDP",e_pns1pid = structure(c(272012925,label = "Nat/step/adopt parent 1: PIDP",e_pns2pid = structure(c(-8,205598689,477995137,label = "Nat/step/adopt parent 2: PIDP",e_grfpid = structure(c(-8,label = "Grandfather: PIDP",e_grmpid = structure(c(-8,409500089,label = "Grandmother: PIDP",e_hrpid = structure(c(272012925,690118488,164178525,4626045),label = "Household reference person: PIDP",e_ppid = structure(c(-8,411978685,4626725,4626045 ),label = "partner's person identifier: PIDP",e_sppid = structure(c(-8,label = "Spouse's person identifier: PIDP",e_fnspid = structure(c(-8,label = "Nat/step/adopt father: PIDP",e_mnspid = structure(c(272012925,label = "Nat/step/adopt mother: PIDP",e_sex = structure(c(2,2,1,2),label = "sex",format.stata = "%8.0g",`don't know` = -1,male = 1,female = 2),e_bosssex = structure(c(-8,-7,label = "sex of boss ",e_cowosexn = structure(c(-8,-8 ),label = "number of co-workers female ",e_cowosexp = structure(c(-8,label = "proportion of co-workers female ",`0 in 10` = 0,`1 in 10` = 1,`2 in 10` = 2,`3 in 10` = 3,`4 in 10` = 4,`5 in 10` = 5,`6 in 10` = 6,`7 in 10` = 7,`8 in 10` = 8,`9 in 10` = 9,`10 in 10` = 10,`can t say` = 98 ),e_sexuor = structure(c(-8,label = "sexual orientation ",`heterosexual or straight` = 1,`gay or lesbian` = 2,bisexual = 3,other = 4,`prefer not to say` = 5),e_scwhorusex = structure(c(1,4,3,8,1),label = "important who you are: gender ",`very important to my sense of who i am` = 1,`fairly important to my sense of who i am` = 2,`not very important to my sense of who i am` = 3,`not at all important to my sense of who i am` = 4,`don't know/doesn't apply` = 8),e_ppsex = structure(c(-8,label = "Partner's sex","double" )),e_pn1sex = structure(c(2,label = "Natural parent 1: Sex",e_pn2sex = structure(c(-8,label = "Natural parent 2: Sex",e_pns1sex = structure(c(2,label = "Nat/step/adopt parent 1: Sex",e_pns2sex = structure(c(-8,label = "Nat/step/adopt parent 2: Sex",e_sex_dv = structure(c(2,label = "Sex,derived",inconsistent = 0,Male = 1,Female = 2),e_dvage = structure(c(28,34,25,27,26,31,43,22,48,16,23,29,59,35,68,57,77,75),label = "age from date of birth or ageif ",e_maage = structure(c(-8,54,86,label = "mother s age",e_paage = structure(c(62,64,52,47,label = "father s age",e_ageret = structure(c(-8,label = "age expected to retire ",e_heritage1 = structure(c(0,-9,0),label = "historical sites: a city or town with historic character",`not mentioned` = 0,mentioned = 1),e_heritage2 = structure(c(1,label = "historical sites: a historic building open to the public (non-religious)",e_heritage3 = structure(c(0,label = "historical sites: a historic park or garden open to the public ",e_heritage4 = structure(c(0,label = "historical sites: a place connected with industrial history (e.g. an old fact ",e_heritage5 = structure(c(0,label = "historical sites: a historic place of worship attended as a visitor (not to w ",e_heritage6 = structure(c(1,label = "historical sites: a monument such as a castle,fort or ruin ",e_heritage7 = structure(c(0,label = "historical sites: a site of archaeological interest (e.g. roman villa,ancien ",e_heritage8 = structure(c(0,label = "historical sites: a site connected with sports heritage (e.g. wimbledon) (not ",e_heritage96 = structure(c(0,label = "historical sites: none of these things ",e_scwhoruage = structure(c(1,label = "important who you are: age/life stage ",e_scage1drnk = structure(c(11,13,15,18,17,12,19,19),label = "age of first alcoholic drink",e_age_if = structure(c(0,label = "Imputation flag for age_dv",`nothing imputed` = 0,`(partly) imputed` = 1),e_age_dv = structure(c(28,label = "Age,derived from dob_dv and intdat_dv",e_agegr5_dv = structure(c(6,7,6,9,5,10,14,15 ),label = "Age group (age_dv): 5 year intervals",`0-4 years old` = 1,`5-9 years old` = 2,`10-14 years old` = 3,`15-19 years old` = 4,`20-24 years old` = 5,`25-29 years old` = 6,`30-34 years old` = 7,`35-39 years old` = 8,`40-44 years old` = 9,`45-49 years old` = 10,`50-54 years old` = 11,`55-59 years old` = 12,`60-64 years old` = 13,`65-69 years old` = 14,`70 years or older` = 15),e_agegr10_dv = structure(c(3,8),label = "Age group (age_dv): 10 year intervals",`0-9 years old` = 1,`10-19 years old` = 2,`20-29 years old` = 3,`30-39 years old` = 4,`40-49 years old` = 5,`50-59 years old` = 6,`60-69 years old` = 7,`70 years or older` = 8),e_agegr13_dv = structure(c(5,11,13),label = "Age group (age_dv): 13 categories",`0-15 years old` = 1,`16-17 years old` = 2,`18-19 years old` = 3,`20-24 years old` = 4,`25-29 years old` = 5,`30-34 years old` = 6,`35-39 years old` = 7,`40-44 years old` = 8,`45-49 years old` = 9,`50-54 years old` = 10,`55-59 years old` = 11,`60-64 years old` = 12,`65 years or older` = 13 ),e_resavoid1_1 = structure(c(-8,label = "avoid place no. 1: reasons avoided places: your sex",e_resavoid1_2 = structure(c(-8,label = "avoid place no. 2: reasons avoided places: your sex",e_resavoid1_3 = structure(c(-8,label = "avoid place no. 3: reasons avoided places: your sex",e_resavoid1_4 = structure(c(-8,label = "avoid place no. 4: reasons avoided places: your sex",e_resavoid1_5 = structure(c(-8,label = "avoid place no. 5: reasons avoided places: your sex",e_resavoid1_6 = structure(c(-8,label = "avoid place no. 6: reasons avoided places: your sex",e_resavoid1_7 = structure(c(-8,label = "avoid place no. 7: reasons avoided places: your sex",e_resavoid1_8 = structure(c(-8,label = "avoid place no. 8: reasons avoided places: your sex",e_resavoid1_9 = structure(c(-8,label = "avoid place no. 9: reasons avoided places: your sex",e_resavoid1_10 = structure(c(-8,label = "avoid place no. 10: reasons avoided places: your sex",e_resavoid1_11 = structure(c(-8,label = "avoid place no. 11: reasons avoided places: your sex",e_resavoid1_12 = structure(c(-8,label = "avoid place no. 12: reasons avoided places: your sex","double"))),row.names = c(NA,-20L),class = c("tbl_df","tbl","data.frame")) == 1的任何变量的值都为1，否则为0。

我尝试了其他类似的事情：

resavoid1

但是我从来没有完全得到想要的东西，否则就会遇到错误。

感谢您的帮助！谢谢！

解决方法

ifelse的问题在于，与if(){}else{}不同，它适用于所有长度相同的向量-输入的长度必须与输出的长度相同。输入测试为any()，其结果为1，因此从ifelse中只能得到1个值，而每行则需要一个值。

我认为这应该可行：

df = df %>%
  mutate(avoid = as.integer(rowSums(select(.,matches("resavoid"))) > 0))

我真的不能告诉您-您共享的59列有点让人不知所措，乍看之下，您共享的resavoid列中的任何列都不包含1值。

我建议使用一个数据样本，例如，其中包含2个非重新排列的列，以及2或3个重新排列的列，其中包含一些1，那么您将能够轻松地查看正在发生的情况

df_test = data.frame(
  x = 1:4,hello = 4:1,e_resavoid1_12 = c(0,1,0),e_eresavoid1_8 = c(0,1)
)

## looks right
df_test %>%  
  mutate(avoid = as.integer(rowSums(select(.,matches("resavoid"))) > 0))
#   x hello e_resavoid1_12 e_eresavoid1_8 avoid
# 1 1     4              0              0     0
# 2 2     3              0              0     0
# 3 3     2              1              1     1
# 4 4     1              0              1     1

我赞成@Gregor的回答，但对于仍在使用新dplyr across语义的人来说，这是等效的解决方案

df %>% 
   rowwise() %>% 
   mutate(avoid = as.integer(sum(c_across(contains("resavoid"))) > 0))

在base R中，我们可以使用rowSums

 df_test$avoid <- +(rowSums(df_test[grepl("resavoid",names(df_test))]) > 0)

根据在R中包含字符串的任何变量上获取值来对变量进行突变

如何解决根据在R中包含字符串的任何变量上获取值来对变量进行突变

解决方法

相关推荐