### Load standardpackages
library(tidyverse) # Collection of all the good stuff like dplyr, ggplot2 ect.
library(magrittr) # For extra-piping operators (eg. %<>%)
library(tidygraph)
library(ggraph)
# remotes::install_github("antongrau/eliter") # Somehow doesnt work
dk_elites <- read_csv('https://github.com/SDS-AAU/SDS-master/raw/master/00_data/networks/elite_den17.csv')
Rows: 56849 Columns: 16
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (7): NAME, AFFILIATION, ROLE, TAGS, SECTOR, TYPE, DESCRIPTION
dbl (6): POSITION_ID, ID, CVR_PERSON, CVR_AFFILIATION, PERSON_ID, AFFILIATION_ID
lgl (1): ARCHIVED
dttm (2): CREATED, LAST_CHECKED
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data <- dk_elites
data %>% head()
data %>% glimpse()
Rows: 56,849
Columns: 16
$ NAME <chr> "Aage Almtoft", "Aage B. Andersen", "Aage Christensen", "Aage Dam", "Aage Dam", "Aage Frandsen", "Aage Juhl Jørgensen…
$ AFFILIATION <chr> "Middelfart Sparekasse", "Foreningen Østifterne - Repræsentantskab (Medlemmer af delegeretforsamling)", "ÅRHUS SØMAND…
$ ROLE <chr> "Member", "Member", "Chairman", "Chairman", "Member", "Member", "Member", "Member", "Member", "Member", "Member", "Me…
$ TAGS <chr> "Corporation, FINA, Banks, Finance", "Charity, Foundation, Insurance, Socialomraadet", "Foundation, Marine, Tourism",…
$ POSITION_ID <dbl> 1, 4, 6, 8, 9, 15, 28, 30, 32, 34, 38, 41, 47, 49, 58, 63, 66, 70, 74, 76, 78, 80, 96, 104, 113, 115, 131, 133, 135, …
$ ID <dbl> 95023, 67511, 100903, 69156, 72204, 73158, 100249, 3165, 72054, 72759, 86531, 71070, 70858, 137228, 100956, 89846, 34…
$ SECTOR <chr> "Corporations", "NGO", "Foundations", "NGO", "NGO", "Parliament", "Corporations", "NGO", "NGO", "NGO", "Foundations",…
$ TYPE <chr> NA, "Organisation", NA, "Organisation", "Stat", NA, NA, NA, NA, NA, NA, NA, NA, "Netværk (VL-gruppe)", NA, NA, NA, NA…
$ DESCRIPTION <chr> "Automatisk CVR import at 2016-03-12 18:01:28: BESTYRELSE i Middelfart Sparekasse (2009-03-31 - ).", "Direktør", "Aut…
$ CREATED <dttm> 2016-03-12 18:01:28, 2016-02-05 14:45:10, 2016-03-12 18:08:31, 2016-02-10 15:18:47, 2016-02-16 10:49:01, 2016-02-17 …
$ ARCHIVED <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ LAST_CHECKED <dttm> 2017-11-09 15:38:01, 2016-02-12 14:41:09, 2017-11-09 15:50:09, 2016-02-10 14:19:20, 2016-02-16 11:55:34, 2016-02-17 …
$ CVR_PERSON <dbl> 4003983591, NA, 4000054465, NA, NA, NA, 4003907021, NA, NA, NA, 4000081578, NA, NA, 4003899511, 4004108676, 400416014…
$ CVR_AFFILIATION <dbl> 24744817, NA, 29094411, NA, 43232010, NA, 25952200, NA, NA, 81191158, 66693511, 37282146, 84414913, NA, 25059115, 107…
$ PERSON_ID <dbl> 1, 3, 4, 5, 5, 9, 16, 18, 20, 21, 23, 25, 30, 31, 36, 38, 40, 43, 46, 47, 47, 49, 60, 63, 68, 69, 81, 82, 82, 82, 82,…
$ AFFILIATION_ID <dbl> 3687, 2528, 237, 469, 1041, 1781, 4878, 1038, 3535, 2733, 1532, 3509, 3462, 8426, 37, 2372, 1065, 2760, 2202, 5536, 1…
colnames(data) <- colnames(data) %>% str_to_lower()
data %>% count(role, sort = TRUE)
role_selected <- c('Member', 'Chairman', 'Vice chairman', 'Chief executive', 'Executive')
data %<>% filter(role %in% role_selected)
data %>% count(type, sort = TRUE)
type_select <- c('Organisation', 'Virksomhed (CVR)')
data %<>% filter(type %in% type_select)
el_2m <- data %>%
select(person_id, affiliation_id)
el <- el_2m %>%
left_join(el_2m %>% select(person_id, affiliation_id), by = "affiliation_id")
el %<>%
select(-affiliation_id) %>%
rename(from = person_id.x,
to = person_id.y) %>%
filter(from != to)
el %<>%
count(from, to, name = 'weight')
el %>%
arrange(desc(weight)) %>%
head()
`
el %>%
ggplot(aes(x = weight)) +
geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
nodes <- data %>%
distinct(person_id, .keep_all = TRUE) %>%
select(person_id, name) %>%
rename(person_name = name,
name = person_id) %>%
mutate(name = name %>% as.character())
g <- el %>% as_tbl_graph(directed = FALSE)
g <- g %N>%
left_join(nodes, by = 'name')
g <- g %N>%
mutate(cent_dgr = centrality_degree(weights = weight),
cent_eigen = centrality_eigen(weights = weight),
cent_between = centrality_betweenness(weights = weight)) %>%
arrange(desc(cent_between))
g <- g %N>%
mutate(community = group_louvain(weights = weight) )
g %N>%
as_tibble() %>%
count(community)
g <- g %N>%
mutate(community = ifelse(community > 10, NA, community))
g %N>%
as_tibble() %>%
count(community)
g <- g %N>%
morph(to_split, community) %N>%
mutate(cent_com = centrality_eigen(weight = weight),
com_center = cent_com == max(cent_com)) %>%
unmorph()
Subsetting by nodes
g %N>%
as_tibble() %>%
group_by(community) %>%
arrange(desc(cent_com)) %>%
slice(1:1) %>%
ungroup()
set.seed(1337)
g %N>%
filter(!is.na(community)) %>%
mutate(community = community %>% as.factor()) %>%
filter(percent_rank(cent_eigen) > 0.95 ) %>%
filter(!node_is_isolated()) %>%
# %E>% #filter(weight > 1) %>%
ggraph(layout = 'fr') +
geom_edge_link(aes(alpha = weight)) +
geom_node_point(aes(size= cent_com, col = community)) +
geom_node_text(aes(label = person_name, filter = com_center == TRUE), repel = TRUE) +
theme_graph() +
theme(legend.position = 'bottom')