如何解决如何按组查找一栏中的前3个滚动值? Q如何为ID创建最多3个国家/地区的最后3列?
数据框具有 3列
-----------------------------------------
| Id | Country | Date |
-----------------------------------------
这三列记录了该人的旅行历史。
还需要再创建3列,以表示此人(ID)在行日期之前最常旅行的前3个国家/地区。
(如果两个国家出现平局,则以最近旅行的国家为准。)
mydata <- data.frame(ID = c('A1B1','A1B1','A2B2','A2B2'),Country = c('Japan','USA','Germany','Japan','France','UK','Spain','Brazil'),Date = as.Date(c('2010/01/02','2010/04/18','2011/03/22','2011/11/23','2012/05/09','2012/09/11','2014/01/06','2015/12/11','2010/04/03','2010/05/11','2011/05/01','2012/03/01','2013/01/03','2014/01/04')))
# final data should look like below
#ID Country Date Pref1 Pref2 Pref3
#A1B1 Japan 2010-01-02 NA NA NA
#A1B1 USA 2010-04-18 Japan NA NA
#A1B1 USA 2011-03-22 USA Japan NA
#A1B1 USA 2011-11-23 USA Japan NA
#A1B1 Germany 2012-05-09 USA Japan NA
#A1B1 Germany 2012-09-11 USA Germany Japan
#A1B1 Japan 2014-01-06 USA Germany Japan
#A1B1 France 2015-12-11 USA Japan Germany
#A2B2 UK 2010-04-03 NA NA NA
#A2B2 Spain 2010-05-11 UK NA NA
#A2B2 Spain 2011-05-01 Spain UK NA
#A2B2 UK 2012-03-01 Spain UK NA
#A2B2 UK 2013-01-03 UK Spain NA
#A2B2 Brazil 2014-01-04 UK Spain NA
Q。如何为ID创建最多3个国家/地区的最后3列?
解决方法
这是一种为每个ID
每行获取最后3个唯一国家的方法。
library(dplyr)
mydata %>%
group_by(ID) %>%
mutate(data = purrr::map(row_number(),~{
un_country <- Country[seq_len(.x - 1)]
if(.x == 1) un_country <- NA
else un_country <- names(sort(table(un_country),decreasing = TRUE))[1:3]
data.frame(t(un_country[1:3]))
})) %>%
tidyr::unnest_wider(data)
# ID Country Date X1 X2 X3
# <chr> <chr> <date> <chr> <chr> <chr>
# 1 A1B1 Japan 2010-01-02 NA NA NA
# 2 A1B1 USA 2010-04-18 Japan NA NA
# 3 A1B1 USA 2011-03-22 Japan USA NA
# 4 A1B1 USA 2011-11-23 USA Japan NA
# 5 A1B1 Germany 2011-05-09 USA Japan NA
# 6 A1B1 Germany 2012-09-11 USA Germany Japan
# 7 A1B1 Japan 2014-01-06 USA Germany Japan
# 8 A1B1 France 2015-12-11 USA Germany Japan
# 9 A2B2 UK 2010-04-03 NA NA NA
#10 A2B2 Spain 2010-05-11 UK NA NA
#11 A2B2 Spain 2011-05-01 Spain UK NA
#12 A2B2 UK 2012-03-01 Spain UK NA
#13 A2B2 UK 2013-01-03 Spain UK NA
#14 A2B2 Brazil 2014-01-04 UK Spain NA
,
这不是一个超级干净的答案。希望它可以帮助您与您亲密接触。
library(readr)
df <- readr::read_table(
"ID Country Date
A1B1 Japan 2010-01-02
A1B1 USA 2010-04-18
A1B1 USA 2011-03-22
A1B1 USA 2011-11-23
A1B1 Germany 2012-05-09
A1B1 Germany 2012-09-11
A1B1 Japan 2014-01-06
A1B1 France 2015-12-11
A2B2 UK 2010-04-03
A2B2 Spain 2010-05-11
A2B2 Spain 2011-05-01
A2B2 UK 2012-03-01
A3B2 UK 2013-01-03
A3B2 Brazil 2014-01-04")
df
library(tidyverse)
rankings <- df %>%
group_by(ID,Country) %>%
summarise(obs = n(),last_dt = max(Date)) %>%
arrange(ID,-obs,desc(last_dt)) %>%
mutate(rank = 1:n()) %>% print() %>%
filter(rank <= 3) %>%
pivot_wider(
names_from = rank,values_from = Country,names_prefix = "rank_",id_cols = ID
) %>% print()
#> `summarise()` regrouping output by 'ID' (override with `.groups` argument)
#> # A tibble: 8 x 5
#> # Groups: ID [3]
#> ID Country obs last_dt rank
#> <chr> <chr> <int> <date> <int>
#> 1 A1B1 USA 3 2011-11-23 1
#> 2 A1B1 Japan 2 2014-01-06 2
#> 3 A1B1 Germany 2 2012-09-11 3
#> 4 A1B1 France 1 2015-12-11 4
#> 5 A2B2 UK 2 2012-03-01 1
#> 6 A2B2 Spain 2 2011-05-01 2
#> 7 A3B2 Brazil 1 2014-01-04 1
#> 8 A3B2 UK 1 2013-01-03 2
#> # A tibble: 3 x 4
#> # Groups: ID [3]
#> ID rank_1 rank_2 rank_3
#> <chr> <chr> <chr> <chr>
#> 1 A1B1 USA Japan Germany
#> 2 A2B2 UK Spain <NA>
#> 3 A3B2 Brazil UK <NA>
df %>% left_join(rankings,by = "ID")
#> # A tibble: 14 x 6
#> ID Country Date rank_1 rank_2 rank_3
#> <chr> <chr> <date> <chr> <chr> <chr>
#> 1 A1B1 Japan 2010-01-02 USA Japan Germany
#> 2 A1B1 USA 2010-04-18 USA Japan Germany
#> 3 A1B1 USA 2011-03-22 USA Japan Germany
#> 4 A1B1 USA 2011-11-23 USA Japan Germany
#> 5 A1B1 Germany 2012-05-09 USA Japan Germany
#> 6 A1B1 Germany 2012-09-11 USA Japan Germany
#> 7 A1B1 Japan 2014-01-06 USA Japan Germany
#> 8 A1B1 France 2015-12-11 USA Japan Germany
#> 9 A2B2 UK 2010-04-03 UK Spain <NA>
#> 10 A2B2 Spain 2010-05-11 UK Spain <NA>
#> 11 A2B2 Spain 2011-05-01 UK Spain <NA>
#> 12 A2B2 UK 2012-03-01 UK Spain <NA>
#> 13 A3B2 UK 2013-01-03 Brazil UK <NA>
#> 14 A3B2 Brazil 2014-01-04 Brazil UK <NA>
由reprex package(v0.3.0)于2020-08-29创建
,这是一个凌乱的Base R解决方案:
rlln_rnk_df <- do.call("rbind",lapply(split(mydata,mydata$ID),function(x){
y <- do.call("rbind",lapply(seq_len(nrow(x)),function(i){
tmp <- x[x$Date <= x$Date[i],]
tmp1 <- cbind(head(tmp[order(tmp$Date,decreasing = TRUE),],1),rnk = t(names(sort(table(tmp$Country),decreasing = TRUE))))
tmp1 <- setNames(tmp1,c(names(tmp),paste0("rnk.",1:(ncol(tmp1) - ncol(tmp)))))
tmp1[,setdiff(paste0("rnk.",1:(length(unique(mydata$Country)))),names(tmp1))] <- NA_character_
tmp1
}
)
)
z <- y[order(y$Date),]
cbind(ID = z$ID,Country = z$Country,Date = z$Date,z[match(z$Date,z$Date[2:nrow(z)]),(grep("rnk",names(z),value = TRUE))])
}
)
)
df_clean <- data.frame(rlln_rnk_df[,colSums(is.na(rlln_rnk_df)) < nrow(rlln_rnk_df)],row.names = NULL)
,
我认为这样做。我在其中加入了<div class="col-lg-4 mb-4" >
<div class="item" data-id="'.$rowia['id'].'">
<div class="card h-100 shadow"><a href="../product/'.$rowia['id'].'"><img class="card-img-top" style="width: 100%;height: 15vw;object-fit: contain;" src="//website-img.arab-group-eg.com/p/'.$rowia['image'].'" alt="'.$rowia['name'].'" /></a>
';
if(check_product_in_cart($rowia['id']) == 1){
echo '
<div class="card-add-success">تمت إضافة هذا المنتج إلى العربة</div>
';
}
echo '
<div class="card-body">
<a href="../product/'.$rowia['id'].'"><h6 class="card-title">'.$rowia['name'].'</h6></a>
<p class="card-price"><span class="font-weight-bold t-c-i">'.(int)cul_installment($rowia['price'],12).'</span> ج.م / 12 شهر</p>
<div class="card-button text-center"><a style="background: #162937;" class="btn btn-primary installment btn-block" data-price="'.$rowia['price'].'" data-name="'.$rowia['name'].'" data-toggle="modal" data-target=".show_data">قسط</a>
';
if(check_product_in_cart($rowia['id']) == 0){
echo '
<a class="btn btn-primary add-to-cart btn-block" href="javascript:void(0)">أضف للعربة</a>
';
}elseif(check_product_in_cart($rowia['id']) == 1){
echo '
<a class="btn btn-primary remove-from-cart btn-block" style="background-color: #FE7C96;" href="javascript:void(0)">× حذف</a>
';
}
echo '
</div>
</div>
</div>
</div>
</div>
,因为我认为其中一个日期有错字。
mydata
如果需要,您可以从原始mydata <- data.frame(ID = c('A1B1','A1B1','A2B2','A2B2'),Country = c('Japan','USA','Germany','Japan','France','UK','Spain','Brazil'),Date = as.Date(c('2010/01/02','2010/04/18','2011/03/22','2011/11/23','2012/05/09','2012/09/11','2014/01/06','2015/12/11','2010/04/03','2010/05/11','2011/05/01','2012/03/01','2013/01/03','2014/01/04')))
library(data.table)
setDT(mydata)
mydata[order(Date),`:=`(num_v = seq_len(.N),last_v = Date),.(ID,Country)]
x <- mydata[
mydata[,CJ(Country = unique(Country),Date = unique(Date)),ID],on=c('ID','Country','Date'),roll=Inf]
x[,`:=`(num_v = shift(num_v),last_v = shift(last_v)),Country)]
x[is.na(num_v),Country := NA]
y <- x[,.SD[order(-num_v,-last_v)][1:3,.(Pref = paste0('Pref',1:3),Country)],Date)]
dcast(y,ID+Date~Pref,value.var = 'Country')
#> ID Date Pref1 Pref2 Pref3
#> 1: A1B1 2010-01-02 <NA> <NA> <NA>
#> 2: A1B1 2010-04-18 Japan <NA> <NA>
#> 3: A1B1 2011-03-22 USA Japan <NA>
#> 4: A1B1 2011-11-23 USA Japan <NA>
#> 5: A1B1 2012-05-09 USA Japan <NA>
#> 6: A1B1 2012-09-11 USA Germany Japan
#> 7: A1B1 2014-01-06 USA Germany Japan
#> 8: A1B1 2015-12-11 USA Japan Germany
#> 9: A2B2 2010-04-03 <NA> <NA> <NA>
#> 10: A2B2 2010-05-11 UK <NA> <NA>
#> 11: A2B2 2011-05-01 Spain UK <NA>
#> 12: A2B2 2012-03-01 Spain UK <NA>
#> 13: A2B2 2013-01-03 UK Spain <NA>
#> 14: A2B2 2014-01-04 UK Spain <NA>
重新加入Country
。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。