如何解决在R中的不同列中拆分值 数据
数据集中的一列包含类似值
utm_source=google&utm_medium=cpc&utm_campaign=1234567&utm_term=brand%20&utm_content=Brand&gclid=ERtyuiipotf_YTj
如何将其拆分为R中的值?
utm_source utm_medium utm_campaign utm_brand utm_content
google cpc 1234567 brand%20 Brand
dput(column)
给出以下输出
structure(list("null","gclid=ertyyhglkdl-kjkY","utm_source=google&utm_medium=cpc&utm_campaign=1234556&utm_term=brand%20shirts&utm_content=Brand&gclid=jhajsgjdgd_ajs","utm_source=google&utm_medium=cpc&utm_campaign=1674814043&utm_term=brand%20shirts&utm_content=Brand&gclid=KvgMsEAAYASAAEgLq6vD_BwE","null","utm_source=fb&utm_medium=ctw&utm_campaign=Shirt_rem&utm_content=CasciaShirt"),class = c("extracted","list"))
解决方法
以OP的更新示例为list
,我们遍历list
,if
元素不是"null"
,然后创建tibble
,使用&
将separate_rows
处的列拆分,然后将该列拆分为多列(separate
),并使用{{1}从命名向量(deframe
)中创建小标题})
as_tibble_row
-输出
library(dplyr)
library(tidyr)
library(tibble)
library(purrr)
map_dfr(lst1,~ if(.x != "null") tibble(col1 = .x) %>%
separate_rows(col1,sep="&") %>%
separate(col1,into = c('col1','col2'),sep="\\=") %>%
deframe %>%
as_tibble_row())
或者,我们可以将# A tibble: 4 x 6
# gclid utm_source utm_medium utm_campaign utm_term utm_content
# <chr> <chr> <chr> <chr> <chr> <chr>
#1 ertyyhglkdl-kjkY <NA> <NA> <NA> <NA> <NA>
#2 jhajsgjdgd_ajs google cpc 1234556 brand%20shirts Brand
#3 KvgMsEAAYASAAEgLq6vD_BwE google cpc 1674814043 brand%20shirts Brand
#4 <NA> fb ctw Shirt_rem <NA> CasciaShirt
转换为list
中的一列,而不必循环执行,只需执行一次并转换为宽格式即可。
data.frame
数据
library(data.table)
keep(lst1,~ .x != "null") %>%
flatten_chr %>%
tibble(col1 = .) %>%
mutate(rn = row_number()) %>%
separate_rows(col1,sep='&') %>%
separate(col1,sep="\\=") %>%
pivot_wider(names_from = col1,values_from = col2) %>%
select(-rn)
# A tibble: 4 x 6
# gclid utm_source utm_medium utm_campaign utm_term utm_content
# <chr> <chr> <chr> <chr> <chr> <chr>
#1 ertyyhglkdl-kjkY <NA> <NA> <NA> <NA> <NA>
#2 jhajsgjdgd_ajs google cpc 1234556 brand%20shirts Brand
#3 KvgMsEAAYASAAEgLq6vD_BwE google cpc 1674814043 brand%20shirts Brand
#4 <NA> fb ctw Shirt_rem <NA> CasciaShirt
,
我不确定这是否是预期的输出。以下可能是您目标的基本R选项
Reduce(
function(...) merge(...,all = TRUE),lapply(
column,function(x) {
u <- unlist(strsplit(x,"&"))
setNames(data.frame(as.list(gsub(".*=","",u))),gsub("=.*",u))
}
)
)
给出
utm_source utm_medium utm_campaign utm_content null gclid
1 fb ctw Shirt_rem CasciaShirt <NA> <NA>
2 google cpc 1234556 Brand <NA> jhajsgjdgd_ajs
3 google cpc 1674814043 Brand <NA> KvgMsEAAYASAAEgLq6vD_BwE
4 <NA> <NA> <NA> <NA> null ertyyhglkdl-kjkY
utm_term
1 <NA>
2 brand%20shirts
3 brand%20shirts
4 <NA>
更新
如果您想保留所有数据,即使它们是null
,也可以尝试下面的代码
Reduce(
function(x,y) {
if (all(is.na(x)) | all(is.na(y))) {
return(rbind(x,y))
}
dplyr::full_join(x,y)
},function(x) {
if (x == "null") {
return(NA)
}
u <- unlist(strsplit(x,u))
}
)
)
给出
gclid utm_source utm_medium utm_campaign utm_term
1 <NA> <NA> <NA> <NA> <NA>
2 ertyyhglkdl-kjkY <NA> <NA> <NA> <NA>
3 jhajsgjdgd_ajs google cpc 1234556 brand%20shirts
4 KvgMsEAAYASAAEgLq6vD_BwE google cpc 1674814043 brand%20shirts
5 <NA> <NA> <NA> <NA> <NA>
6 <NA> <NA> <NA> <NA> <NA>
7 <NA> <NA> <NA> <NA> <NA>
8 <NA> <NA> <NA> <NA> <NA>
9 <NA> <NA> <NA> <NA> <NA>
10 <NA> <NA> <NA> <NA> <NA>
11 <NA> <NA> <NA> <NA> <NA>
12 <NA> <NA> <NA> <NA> <NA>
13 <NA> <NA> <NA> <NA> <NA>
14 <NA> <NA> <NA> <NA> <NA>
15 <NA> fb ctw Shirt_rem <NA>
utm_content
1 <NA>
2 <NA>
3 Brand
4 Brand
5 <NA>
6 <NA>
7 <NA>
8 <NA>
9 <NA>
10 <NA>
11 <NA>
12 <NA>
13 <NA>
14 <NA>
15 CasciaShirt
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。