如何解决在R中创建分组的滞后指标
我试图基于另一个变量的滞后值在R中创建一个分组的新变量。
我的data.frame看起来像这样:
str(parlgov)
tibble [8,623 × 16] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ country_name_short : chr [1:8623] "AUS" "AUS" "AUS" "AUS" ...
$ country_name : chr [1:8623] "Australia" "Australia" "Australia" "Australia" ...
$ election_type : chr [1:8623] "parliament" "parliament" "parliament" "parliament" ...
$ election_date : Date[1:8623],format: "1901-03-30" "1901-03-30" "1901-03-30" ...
$ vote_share : num [1:8623] 44.4 34.2 19.4 1.4 0.6 29.7 34.4 31 0.5 4.5 ...
$ seats : num [1:8623] 32 26 15 1 1 26 25 23 1 0 ...
$ seats_total : num [1:8623] 75 75 75 75 75 75 75 75 75 75 ...
$ party_name_short : chr [1:8623] "PP" "FTP" "ALP" "none" ...
$ party_name : chr [1:8623] "Protectionist Party" "Free Trade Party" "Australian Labor Party" "no party affiliation" ...
$ party_name_english : chr [1:8623] "Protectionist Party" "Free Trade Party" "Australian Labor Party" "no party affiliation" ...
$ left_right : num [1:8623] 7.4 6 3.88 NA NA ...
$ country_id : num [1:8623] 33 33 33 33 33 33 33 33 33 33 ...
$ election_id : num [1:8623] 731 731 731 731 731 730 730 730 730 730 ...
$ previous_parliament_election_id: num [1:8623] NA NA NA NA NA 731 731 731 731 731 ...
$ previous_cabinet_id : num [1:8623] NA NA NA NA NA 997 997 997 997 997 ...
$ party_id : num [1:8623] 1898 1938 1253 1396 2299 ...
由于data.frame太大,我留下了dput
行的随机样本:
dfsample <- sample_n(parlgov,100)
dput(dfsample)
structure(list(country_name_short = c("GBR","LVA","FRA","CHE","DNK","CAN","PRT","CYP","ISR","SVN","LUX","MLT","AUS","ITA","FIN","SVK","LTU","DEU","CZE","NLD","EST","GBR","BEL","GRC","AUT","ROU","JPN","NOR","HRV","HUN","NZL","ISL","BGR","IRL","IRL"),country_name = c("United Kingdom","Latvia","France","Switzerland","Denmark","Canada","Portugal","Cyprus","Israel","Slovenia","Luxembourg","Malta","Australia","Italy","Finland","Slovakia","Lithuania","Germany","Czech Republic","Netherlands","Estonia","United Kingdom","Belgium","Greece","Austria","Romania","Japan","Norway","Croatia","Hungary","New Zealand","Iceland","Bulgaria","Ireland","Ireland"),election_type = c("ep","parliament","ep","parliament"),election_date = structure(c(18039,9404,7108,12344,2602,-14402,14402,7808,5317,17685,-7059,-18282,6704,17594,3364,16866,15627,-18610,16003,8487,12694,-18147,18039,-14826,-795,18042,14399,6170,-13399,4189,9789,15690,10755,-15417,15409,13681,10959,5043,-13570,7188,16215,17629,-18278,-9630,10397,12582,-2887,-6815,9645,3447,-2398,15822,12959,17420,-8614,8192,18189,16460,11824,-17999,3582,2124,5281,17146,-23800,-15736,13235,-5273,8928,-16657,15998,17055,-8838,13474,2810,18040,-1522,-7388,9810,11965,6459,13842,18323,7650,18321,4710),class = "Date"),vote_share = c(14.1,1.49,23.61,23.3,3.3,1.24,28.41,35.8,31.9,1.09,5.1,5.99,42.5,4.7,18.76,1.2,5.61,7.31,10.26,14.91,1.8,6.82,0.9,12.18,26.1,23.44,21.14,1.4,1.23,1.01,43.1,1,4.22,3.4,36,2.2,5.69,17.34,36.84,8.55,2.47,1.9,NA,1.75,3.26,0.44,4.38,6.69,9.98,4.93,3.5,8.6,22.09,0.3,35.47,7.4,12.6,28.2,26.7,5.19,10.3,18.93,5.87,15.11,2.46,34,3.7,21.1,1.44,8.87,6.27,9.39,30.14,0.34,20.6,9.5,8.2,2.95,10.79,9.82,20.28,32.18,23.77,10.4,5.89,4.2,11.37,6.14,1.15,2.4,5.98,21.5,8.3,39.22),seats = c(10,22,52,6,2,7,20,41,26,9,112,10,12,33,4,59,5,3,80,268,15,16,8,19,13,104,14,29,51,46,30,39,134,70),seats_total = c(73,100,81,200,179,245,56,120,90,60,40,75,630,150,141,423,577,139,615,24,21,72,183,474,480,87,151,73,584,199,63,240,169,556,300,166,148,329,25,214,574,522,155,11,35,166),party_name_short = c("Lab","TPA","PS","SP-PS","RF","L","DISY","ZS","KPL","DAP","ALP","CD","PD","POP","S","TT-LDP","DNVP","KSCM","MP","SLS","no-seat","CDA","EK","Lib","CVP|CD&V","DG","LAOS","NO2EU","SPO","CDU","none","A","LDP","TPJ","DS","DNA","OLaNO","droite","HSU","one-seat","BVP","SP","NCD","AGL-Gr","FKgP","BE","LCL","US","LSAP","AS","PN","Meretz","F","Sj","BNS","Sp","PCI","HSD-SMS","FDP-PRD","KIDISO","V","VS","KK/CVP","DP","USR","SNS","VB","NZLP","KF","Shas","AhAv","FN","EDEK","PCF","SPH","VIHR","PVV","SV","Green","FeBo","SOPS","LPP","Jf","PIN","YaToMe","SDAP","SF","SPOLU","FG"
),party_name = c("Labour Party","Tautsaimnieku politisk? apvien?ba","Parti socialiste","Sozialdemokratische Partei der Schweiz \x96 Parti Socialiste Suisse","Retsforbund","Labour Party","Partido Socialista","Dimokratik\xf3s Sinayerm\xf3s","Likud","Zeleni Slovenije","Kommunistesch Partei L\xebtzebuerg","Democratic Action Party","Australian Labor Party","Centrumdemokraterne","Partito Democratico","Perustuslaillinen Oikeistopuolue \x96 Konstitutionella h\xf6gerpartiet","Sie?","Tvarka ir teisingumas \x96 Liberal? demokrat? partija","Deutschnationale Volkspartei","Komunistick\xe1 strana Cech a Moravy","Parti de la Majorit\xe9 pr\xe9sidentielle","Slovenska ljudska stranka","no seat","Christen Democratisch App\xe8l","Eesti Keskerakond","Liberals","Christelijke Volkspartij | Christen-Democratisch en Vlaams","Gruppe der Demokraten \x96 Groupe des democrates","La\xefk\xf3s Orth\xf3doxos Synagerm\xf3s","NO2EU \x96 Yes to Democracy","Sozialdemokratische Partei \xd6sterreichs","Christelijk-Democratische Unie","no party affiliation","Alternativet","Lietuvos demokrat? partija","Nippon Mirai no T?","Democratici di Sinistra","Det norske Arbeiderparti","Oby?ajn\xed ?udia a nez\xe1visl\xe9 osobnosti","divers droite","Hrvatska stranka umirovljenika","one seat","Sinistra","Bayerische Volkspartei","Socialistiese Partij","Nuovo Centrodestra | Alternativa Popolare","Agalev \x96 Groen","F\xfcggetlen Kisgazda P\xe1rt","Bloco de Esquerda","Liberal and Country League","Unie svobody","L\xebtzebuerger Sozialistesch Aarbechterpartei","\xc5l\xe4ndsk Samlingin \xe4\xe4nt\xe4 \x96 \xc5l\xe4ndsk Samling","Partit Nazzjonalista","Frams\xf3knarflokkurinn","Sj\xe1lfst\xe6\xf0isflokkurinn","Balgarski Naroden Suyuz","Senterpartiet","Partito Comunista Italiano","Hnut\xed za samospr\xe1vnou demokracii \x96 Spole?nost pro Moravu a Slezsko","Freisinnig-Demokratische Partei der Schweiz \x96 Parti Radical-Democratique Suisse","Kinima Dimokraton Sosialiston","Venstre","Venstresocialisterne","Katholische Konservative / Christlichdemokratische Volkspartei \x96 Conservateurs catholiques / Parti d\xe9mocrate-chr\xe9tien","Democrazia Proletaria","Uniunea Salva?i Rom\xe2nia","Slovenska nacionalna stranka","Vlaams Blok","New Zealand Liberal Party","Konservative","Shomrei Sfarad","Ahdut HaAvoda","Front National","Kinima Sosialdimokraton EDEK","Parti communiste fran\xe7aise","Socijaldemokratska partija Hrvatske","Vihre\xe4 Liitto \x96 Gr\xf6na F\xf6rbundet","Partij voor de Vrijheid","Sosialistisk Venstreparti","Green Party \x96 Comhaontas Glas","Felleslister borgerlige","Slovenska obrtno podjetni\x9aka stranka","Latvijas Pirm? partija","Javna\xf0arflokkurin","Partidul Ini?iativa Na?ional?","Yahadut HaTora HaMeuhedet","Sociaal Democratische Arbeiders Partij","Socialistisk Folkeparti","SPOLU \x96 ob?ianska demokracia","Fine Gael"),party_name_english = c("Labour","Political Union of Economists","Socialist Party","Social Democratic Party of Switzerland","Justice Party","Democratic Rally","The Consolidation","Greens of Slovenia","Communist Party of Luxembourg","Centre Democrats","Democratic Party","Constitutional People's Party","Network","Order and Justice -- Liberal Democratic Party","German National People's Party","Communist Party of Bohemia and Moravia","Party of Presidential Majority","Slovenian People's Party","Christian Democratic Appeal","Estonian Centre Party","Flemish Christian Peoples Party | Christian Democrats & Flemish","Democratic Group","Popular Orthodox Rally","NO2EU -- Yes to Democracy","Social Democratic Party of Austria","Christian Democratic Union","The Alternative","Lithuanian Democratic Party","Tomorrow Party of Japan","Democrats of the Left","Norwegian Labour Party","Ordinary People and Independent","other right","Croatian Party of Pensioners","Left","Bavarian People's Party","New Centre-Right | Popular Alternative","Agalev -- Green","Independent Small Holders Party","Bloc of the Left","Freedom Union","Luxembourg Socialist Workers' Party","Aland Coalition","Nationalist Party","Energy","Progressive Party","Independence Party","Bulgarian People's Union","Centre Party","Communist Party","Movement for Self-Governing Democracy -- Society for Moravia and Silesia","Radical Democratic Party","Movement of Democratic Socialists","Liberal Party","Left Socialists","Catholic Conservative / Christian Democratic Peoples Party","Proletarian Democracy","Save Romania Union","Slovenian National Party","Flemish Block","Conservatives","Sfarad's guards of the Torah","Labour Unity","National Front","Movement for Social Democracy EDEK","French Communist Party","Social Democratic Party of Croatia","Green League","Party for Freedom","Socialist Left Party","Green Party","Electoral lists Conservatives","Slovenian Craftsmen and Entreprenerial Party","Latvia's First Party","Social Democratic Party (Faroe Islands)","National Initiative Party","United Torah Judaism","Social Democratic Workers' Party","Socialist Peoples Party","TOGETHER -- Civic Democracy","Fine Gael (Familiy of the Irish)"),left_right = c(4.3562,3.2493,1.8319,4.0492,8.6842,6.6788,2.5,1.3,3.8833,5.5609,2.6216,5.2955,8.8,0.7498,6.6953,5.9376,3.9562,4.2875,5.7667,9.11,3.7293,6.2,5.8333,2.6277,3.3706,7.6583,1.3682,2.5762,9.0186,1.6425,7.2204,3.2895,5.7143,1.7786,5.0365,7.4854,5.8,4.6565,1.6,6.3249,7.292,0.8,4.7288,0.5,4.7941,9.6622,7.2186,7.0417,9.655,1.3741,3.2456,3.6465,1.5839,2.435,6.3377,6.9396,2.1347,6.4372),country_id = c(44,55,43,67,54,68,44,64,23,62,37,37),election_id = c(1051,534,360,206,409,858,342,714,1025,526,135,723,387,1024,84,1003,786,1031,805,664,893,1052,395,909,553,373,1060,410,235,941,229,1049,566,788,363,867,781,171,797,631,1053,1039,627,834,832,1023,954,838,737,27,593,354,244,717,379,798,618,1017,503,1083,851,408,894,242,371,458,1015,624,884,896,720,706,536,397,991,803,1007,471,198,578,499,1061,709,478,338,260,226,676,1090,157,1089,307),previous_parliament_election_id = c(1013,564,643,427,464,859,210,127,713,817,550,750,789,98,646,599,525,892,1012,83,908,191,108,992,233,456,940,319,976,496,439,327,551,951,524,403,435,1038,486,814,953,702,738,301,299,616,716,243,320,681,801,995,784,316,1000,176,617,787,332,883,895,719,705,418,339,990,172,1001,983,253,335,523,1002,708,220,382,597,165,1079,228),previous_cabinet_id = c(1507,201,712,50,1167,1114,1144,175,1470,1010,837,1501,492,1027,1026,1062,292,1481,1530,902,1291,457,513,1593,119,828,1258,1497,1148,91,969,1231,982,807,1532,1580,987,1104,1320,917,76,196,598,1471,960,273,331,651,575,1065,262,1212,126,1249,824,79,334,1197,95,1466,1246,1129,928,1354,1063,1408,759,65,520,1511,935,58,562,195,118,1613,57,1529,54),party_id = c(1556,278,1539,1606,2427,725,1575,678,1619,816,1253,1324,1389,2624,1421,2692,1173,2260,2058,1137,659,886,1179,2147,973,2542,761,2567,2785,809,1759,285,2134,2303,465,2697,357,2268,1594,870,1262,557,1919,688,701,66,984,1419,1455,1342,1375,1088,2407,1605,189,531,1424,2646,981,993,2469,1189,590,1343,945,686,1493,1573,2308,2194,2275,1043,1894,1589,1303,1451,1644,2752,1393)),class = c("spec_tbl_df","tbl_df","tbl","data.frame"),row.names = c(NA,-100L),spec = structure(list(
cols = list(country_name_short = structure(list(),class = c("collector_character","collector")),country_name = structure(list(),election_type = structure(list(),election_date = structure(list(format = ""),class = c("collector_date",vote_share = structure(list(),class = c("collector_double",seats = structure(list(),seats_total = structure(list(),party_name_short = structure(list(),party_name = structure(list(),party_name_english = structure(list(),left_right = structure(list(),country_id = structure(list(),election_id = structure(list(),previous_parliament_election_id = structure(list(),previous_cabinet_id = structure(list(),party_id = structure(list(),"collector"))),default = structure(list(),class = c("collector_guess",skip = 1),class = "col_spec"))
我想做的是拥有一个等于election_date
滞后值的变量,其中election_type == "ep"
和country_id
每一组的election_id
。
我尝试了很多类似的事情:
parlgov%>%
filter(election_type %in% c("ep","parliament"))%>%
group_by(country_id,election_type)%>%
arrange(election_date)%>%
mutate(prev_election_ep = lag(election_date))
但是我得到最多的是每一行的滞后值,而不是country_id
和election_id
的每一组。
基本上,我想要的是一个新变量,其中每个country_id
和election_id
的组都取election_type == "ep"
的最后一个election_date
的值。这样的东西(虚拟数据):
country_name_short country_id election_type election_date prev_election_ep
1 CZE 5 parliamentary 1999-03-30 <NA>
2 CZE 5 ep 2000-03-10 <NA>
3 CZE 5 parliamentary 2004-03-12 2000-03-10
4 CZE 5 ep 2005-05-01 2000-03-10
5 CZE 5 parliamentary 2006-06-03 2005-05-01
6 DNK 10 parliamentary 2000-03-10 2000-03-10
7 DNK 10 ep 2000-03-10 <NA>
8 DNK 10 parliamentary 2005-03-02 2000-03-10
9 DNK 10 parliamentary 2005-05-01 2000-03-10
10 DNK 10 ep 2009-09-10 2000-03-10
11 ESP 55 parliamentary 1982-03-30 <NA>
12 ESP 55 ep 1985-05-10 <NA>
13 ESP 55 parliamentary 1989-09-31 1985-05-10
14 ESP 55 ep 1990-02-12 1985-05-10
15 ESP 55 parliamentary 1991-04-20 1990-02-12
解决方法
考虑此功能
last_where <- function(x,cond) {
x[vapply(seq_along(x),function(x,p) {
out <- tail(which(x > p),1L)
if (length(out) < 1L) NA_integer_ else p[[out]]
},integer(1L),which(cond))]
}
然后该管道为您提供所需的输出
parlgov %>%
group_by(country_id,election_id) %>%
arrange(election_date,election_type,.by_group = TRUE) %>%
mutate(prev_elecion_ep = last_where(election_date,election_type == "ep"))
使用样本数据进行测试
> df %>%
+ group_by(country_id) %>%
+ arrange(election_date,.by_group = TRUE) %>%
+ mutate(prev_elecion_ep1 = last_where(election_date,election_type == "ep"))
country_name_short country_id election_type election_date prev_election_ep prev_elecion_ep1
<chr> <dbl> <chr> <date> <chr> <date>
1 CZE 5 parliamentary 1999-03-30 <NA> NA
2 CZE 5 ep 2000-03-10 <NA> NA
3 CZE 5 parliamentary 2004-03-12 2000-03-10 2000-03-10
4 CZE 5 ep 2005-05-01 2000-03-10 2000-03-10
5 CZE 5 parliamentary 2006-06-03 2005-05-01 2005-05-01
6 DNK 10 ep 2000-03-10 <NA> NA
7 DNK 10 parliamentary 2000-03-10 2000-03-10 2000-03-10
8 DNK 10 parliamentary 2005-03-02 2000-03-10 2000-03-10
9 DNK 10 parliamentary 2005-05-01 2000-03-10 2000-03-10
10 DNK 10 ep 2009-09-10 2000-03-10 2000-03-10
11 ESP 55 parliamentary 1982-03-30 <NA> NA
12 ESP 55 ep 1985-05-10 <NA> NA
13 ESP 55 ep 1990-02-12 1985-05-10 1985-05-10
14 ESP 55 parliamentary 1991-04-20 1990-02-12 1990-02-12
15 ESP 55 parliamentary NA 1985-05-10 1990-02-12
,
尝试使用此:
library(dplyr)
dfsample <- dfsample %>%
arrange(country_name,election_date) %>%
mutate(grp = cumsum(election_type == 'ep'))
dfsample %>%
group_by(country_name,grp) %>%
summarise(prev_election_ep = election_date[election_type == 'ep']) %>%
mutate(prev_election_ep = lag(prev_election_ep)) %>%
left_join(dfsample,by = c('country_name','grp'))
我们没有足够的数据进行测试,但是这里的逻辑是为数据中的每个grp
值创建一个临时ep
列。对于该组,我们得到election_date
,其中election_type = 'ep'
,然后使用lag
将值后移一步,并将数据连接到原始数据帧。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。