如何解决循环浏览R中的列 数据
我正在尝试计算每列中有多少个“ S”作为从1到10行的“下游”,然后作为从15到25行的“上游”。
然后我要将输出保存在文本文件中。 好吧,我设法解决了一个例子。不幸的是,我在循环保存列时也遇到了问题。在这种情况下,列数为5,但可以根据文件而有所不同。
#data frame
S <- data.frame(scale = c(0,1,0),aa = c('A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y'))
#input (example)
V1 V2 V3 V4 V5
1 C D E R N
2 C A M K P
3 V T Q Q E
4 A T S S S
5 C D E R N
6 C A M K P
7 V T Q Q E
8 A T S S S
9 R V D S A
10 W R H I C
11 S N I P T
12 Q A S D E
13 C D E R N
14 C A M K P
15 V T Q Q E
16 A T S S S
17 C D E R N
18 C A M K P
19 V T Q Q E
20 A T S S S
21 R V D S A
22 W R H I C
23 S N I P T
24 G A D S S
25 N T T S A
# matching the data from two data frames
df11 <- df_trial %>%
pivot_longer(cols = everything(),values_to = 'aa') %>%
mutate(aa = replace(aa,aa == '-','')) %>%
left_join(S,by = 'aa') %>%
arrange(name) %>%
group_by(name) %>%
mutate(row = row_number())
view(df11)
values_for_all <- df11 %>%
pivot_wider(names_from = name,values_from = c(scale,aa)) %>%
select(-row)
view(values_for_all)
#class(values_for_all)
#problem与循环遍历以下列:!!!!!!!!!!!!!!!!!
#sum values from positions 1 to 10 and then from 15 to 25
downstream <- sum(values_for_all$scale_V1[1:11])
#view(downstream)
upstream <- sum(values_for_all$scale_V1[15:25])
#view(upstream)
res <- cbind(downstream,upstream)
res_trial<- as.data.frame(t(res))
view(res_trial)
#class(res_trial)
#converting a matrix to the data frame
res_final <- as.data.frame(t(res_trial))
view(res_final)
#class(res_final)
#saving to a text file
write.table(res_final,"~/Desktop/R_work/test.txt",sep="\t",row.names=FALSE)
#expected outcome (example):
downstream upstream
2 0
0 0
谢谢您的帮助!
解决方法
在对行进行子集设置后,我们可以使用colSums
colSums(df_trial[1:10,] == 'S')
# V1 V2 V3 V4 V5
# 0 0 2 3 2
colSums(df_trial[15:25,] == 'S')
# V1 V2 V3 V4 V5
#1 0 2 5 3
或与dplyr
library(dplyr)
df_trial %>%
summarise(across(everything(),~ c(sum(.[1:10] == 'S'),sum(.[15:25] == 'S')))) %>%
mutate(categ = c('upstream','downstream'),.before = 1)
# categ V1 V2 V3 V4 V5
#1 upstream 0 0 2 3 2
#2 downstream 1 0 2 5 3
数据
df_trial <- structure(list(V1 = c("C","C","V","A","R","W","S","Q","G","N"),V2 = c("D","T","D","N","T"),V3 = c("E","M","E","H","I",V4 = c("R","K","P","S"),V5 = c("N","A")),class = "data.frame",row.names = c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25"))
,
这项工作:
> S_count <- data.frame(downstream = sum(sapply(S[1:10,],function(x) sum(grepl('S',x)))),+ upstream = sum(sapply(S[15:25,stringsAsFactors = F)
> S_count
downstream upstream
1 7 11
>
以矩阵形式:
> S_count_vec <- rbind(downstream = sapply(S[1:10,x))),+ upstream = sapply(S[15:25,x))))
> S_count_vec
V1 V2 V3 V4 V5
downstream 0 0 2 3 2
upstream 1 0 2 5 3
>
使用的数据:
> dput(S)
structure(list(V1 = c("C",row.names = c(NA,-25L
),class = c("tbl_df","tbl","data.frame"))
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。