### Load standardpackages
library(tidyverse) # Collection of all the good stuff like dplyr, ggplot2 ect.
library(magrittr) # For extra-piping operators (eg. %<>%)
library(tidygraph)
library(ggraph)
#remotes::install_github("antongrau/eliter") # Somehow doesnt work
#library(eliter)
dk_elites <- read_csv('https://github.com/SDS-AAU/SDS-master/raw/master/00_data/networks/elite_den17.csv')
Rows: 56849 Columns: 16
── Column specification ───────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (7): NAME, AFFILIATION, ROLE, TAGS, SECTOR, TYPE, DESCRIPTION
dbl (6): POSITION_ID, ID, CVR_PERSON, CVR_AFFILIATION, PERSON_ID, AFFILIATION_ID
lgl (1): ARCHIVED
dttm (2): CREATED, LAST_CHECKED
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data <- dk_elites
data %>% head()
data %>% glimpse()
Rows: 56,849
Columns: 16
$ NAME <chr> "Aage Almtoft", "Aage B. Andersen", "Aage Christensen", "Aage Dam", "Aage…
$ AFFILIATION <chr> "Middelfart Sparekasse", "Foreningen Ă˜stifterne - Repræsentantskab (Medle…
$ ROLE <chr> "Member", "Member", "Chairman", "Chairman", "Member", "Member", "Member",…
$ TAGS <chr> "Corporation, FINA, Banks, Finance", "Charity, Foundation, Insurance, Soc…
$ POSITION_ID <dbl> 1, 4, 6, 8, 9, 15, 28, 30, 32, 34, 38, 41, 47, 49, 58, 63, 66, 70, 74, 76…
$ ID <dbl> 95023, 67511, 100903, 69156, 72204, 73158, 100249, 3165, 72054, 72759, 86…
$ SECTOR <chr> "Corporations", "NGO", "Foundations", "NGO", "NGO", "Parliament", "Corpor…
$ TYPE <chr> NA, "Organisation", NA, "Organisation", "Stat", NA, NA, NA, NA, NA, NA, N…
$ DESCRIPTION <chr> "Automatisk CVR import at 2016-03-12 18:01:28: BESTYRELSE i Middelfart Sp…
$ CREATED <dttm> 2016-03-12 18:01:28, 2016-02-05 14:45:10, 2016-03-12 18:08:31, 2016-02-1…
$ ARCHIVED <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ LAST_CHECKED <dttm> 2017-11-09 15:38:01, 2016-02-12 14:41:09, 2017-11-09 15:50:09, 2016-02-1…
$ CVR_PERSON <dbl> 4003983591, NA, 4000054465, NA, NA, NA, 4003907021, NA, NA, NA, 400008157…
$ CVR_AFFILIATION <dbl> 24744817, NA, 29094411, NA, 43232010, NA, 25952200, NA, NA, 81191158, 666…
$ PERSON_ID <dbl> 1, 3, 4, 5, 5, 9, 16, 18, 20, 21, 23, 25, 30, 31, 36, 38, 40, 43, 46, 47,…
$ AFFILIATION_ID <dbl> 3687, 2528, 237, 469, 1041, 1781, 4878, 1038, 3535, 2733, 1532, 3509, 346…
colnames(data) <- colnames(data) %>% str_to_lower()
data %>% count(role, sort = TRUE)
role_selected <- c('Member', 'Chairman', 'Vice chairman', 'Chief executive', 'Executive')
data %<>% filter(role %in% role_selected)
data %>% count(type, sort = TRUE)
# type_select <- c('Organisation', 'Virksomhed (CVR)')
# data %<>% filter(type %in% type_select)
data %>% count(sector, sort = TRUE)
sector_select <- c('Corporations')
data %<>% filter(sector %in% sector_select)
el_2m <- data %>%
select(person_id, affiliation_id)
el <- el_2m %>%
left_join(el_2m %>% select(person_id, affiliation_id), by = "affiliation_id")
el %<>%
select(-affiliation_id) %>%
rename(from = person_id.x,
to = person_id.y) %>%
filter(from != to)
el %<>%
count(from, to, name = 'weight') %>%
mutate(weight = weight )
el %>%
arrange(desc(weight)) %>%
head()
el %>%
ggplot(aes(x = weight)) +
geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
nodes <- data %>%
distinct(person_id, .keep_all = TRUE) %>%
select(person_id, name) %>%
rename(person_name = name,
name = person_id) %>%
mutate(name = name %>% as.character())
g <- el %>% as_tbl_graph(directed = FALSE)
g <- g %N>%
left_join(nodes, by = 'name')
g <- g %N>%
mutate(cent_dgr = centrality_degree(weights = weight),
cent_eigen = centrality_eigen(weights = weight), # cent_between = centrality_betweenness(weights = weight) # takes long too compute
)
g <- g %N>%
mutate(community = group_louvain(weights = weight) )
g %N>%
as_tibble() %>%
count(community)
# get rid of small communities
g <- g %N>%
mutate(community = ifelse(community > 10, NA, community))
g <- g %N>%
morph(to_split, community) %N>%
mutate(cent_com = centrality_eigen(weight = weight),
com_center = cent_com == max(cent_com),
com_n = n()) %>%
unmorph()
Subsetting by nodes
g %N>%
as_tibble() %>%
group_by(community) %>%
arrange(desc(cent_com)) %>%
slice(1:1) %>%
ungroup()
data %>% filter(affiliation %>% str_detect('Danske Bank'))
db_members <- data %>%
filter(affiliation_id == 932) %>%
select(person_id) %>%
distinct(person_id, .keep_all = TRUE) %>%
rename(name = person_id) %>%
mutate(name = name %>% as.character(),
db_member = TRUE)
set.seed(1337)
g %N>%
filter(!is.na(community)) %>%
mutate(community = community %>% as.factor()) %>%
filter(percent_rank(centrality_eigen()) > 0.90 ) %>%
filter(!node_is_isolated()) %>%
# %E>% #filter(weight > 1) %>%
ggraph(layout = 'fr') +
geom_edge_link(aes(alpha = weight)) +
geom_node_point(aes(size= cent_com, col = community)) +
geom_node_text(aes(label = person_name, filter = com_center == TRUE), repel = TRUE) +
theme_graph() +
theme(legend.position = 'bottom')
set.seed(1337)
g %N>%
left_join(db_members, by = 'name') %>%
mutate(db_member = ifelse(is.na(db_member), FALSE, db_member)) %>%
filter(!is.na(community) | db_member == TRUE) %>%
mutate(community = community %>% as.factor()) %>%
filter(percent_rank(centrality_eigen()) > 0.80 | db_member == TRUE) %>%
filter(!node_is_isolated()) %>%
# %E>% #filter(weight > 1) %>%
ggraph(layout = 'fr') +
geom_edge_link(aes(alpha = weight)) +
geom_node_point(aes(size= cent_com, col = db_member)) +
geom_node_text(aes(label = person_name, filter = db_member == TRUE), repel = TRUE) +
theme_graph() +
theme(legend.position = 'bottom')
Danske Bank seems here to be a bit isolated…