-
Notifications
You must be signed in to change notification settings - Fork 12
/
ge2017_dataprep.R
40 lines (33 loc) · 1.29 KB
/
ge2017_dataprep.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
library(tidyverse)
library(readxl)
raw <- read.csv("voting-full.csv", stringsAsFactors = F)
parties <- c("Con", "Lab", "LD", "UKIP", "Green", "SNP")
ge2017 <- raw %>%
select(-party_name, -firstname:-former_mp, -share:-change) %>%
filter(party_abbreviation %in% parties, country_name != "Northern Ireland") %>%
spread(party_abbreviation, votes)
ge2017_winner <- raw %>%
filter(country_name != "Northern Ireland") %>%
group_by(ons_id) %>%
filter(votes == max(votes)) %>%
ungroup() %>%
select(Winner = party_abbreviation)
ge2017_second <- raw %>%
filter(country_name != "Northern Ireland") %>%
group_by(ons_id) %>%
#filter (row_number() == 1) %>%
filter(votes == sort(votes, TRUE)[2]) %>%
ungroup() %>%
select(Second = party_abbreviation)
ge2017_margin <- raw %>%
filter(country_name != "Northern Ireland") %>%
group_by(ons_id) %>%
mutate(Margin = max(votes) - sort(votes, TRUE)[2]) %>%
filter (row_number() == 1) %>%
ungroup() %>%
select(Margin)
final <- bind_cols(ge2017, ge2017_winner, ge2017_second, ge2017_margin) %>%
select(PCONCODE = ons_id, CON_2017 = Con, LAB_2017 = Lab, LD_2017 = LD, UKP_2017 = UKIP,
GREEN_2017 = Green, SNP_2017 = SNP, WINNER_2017 = Winner, SECOND_2017 = Second, MAJ_2017 = Margin)
final[is.na(final)] <- 0
write.csv(final, "ge2017.csv")