### Load standardpackages
library(tidyverse) # Collection of all the good stuff like dplyr, ggplot2 ect.
library(magrittr) # For extra-piping operators (eg. %<>%)
library(tidygraph)
library(ggraph)
library(igraph)
In this session, you will learn:
We up to now already talked about different ways how networks can be constructed. Up to now, we mainly focussed on:
However, network analysis and modelling is also fully consistent with other structures, which are often a natural outcome of supervised or unsupervised ML exercises:
mtcars
example.mtcars %>% head()
cars_pca <- mtcars[,c(1:7,10,11)] %>%
drop_na() %>%
prcomp(center = TRUE , scale = TRUE)
dist()
) function.cars_dist <- cars_pca$x %>% dist(method = "euclidean")
La voila. Such a distance matrix representas a relational structure and can be modelled as a network.
g <- cars_dist %>%
as.matrix() %>%
as_tbl_graph(directed = FALSE)
g <- g %>% simplify() %>% as_tbl_graph()
g
# A tbl_graph: 32 nodes and 496 edges
#
# An undirected simple graph with 1 component
#
# Node Data: 32 x 1 (active)
name
<chr>
1 Mazda RX4
2 Mazda RX4 Wag
3 Datsun 710
4 Hornet 4 Drive
5 Hornet Sportabout
6 Valiant
# … with 26 more rows
#
# Edge Data: 496 x 3
from to weight
<int> <int> <dbl>
1 1 2 0.408
2 1 3 2.57
3 1 4 3.38
# … with 493 more rows
g <- g %E>%
mutate(weight = max(weight) - weight) %>%
filter(weight >= weight %>% quantile(0.75)) %N>%
filter(!node_is_isolated()) %>%
mutate(community = group_louvain(weights = weight) %>% factor())
Lets take a look!
set.seed(1337)
g %>% ggraph(layout = "nicely") +
geom_node_point(aes(col = community, size = centrality_degree(weights = weight))) +
geom_edge_link(aes(width = weight), alpha = 0.25) +
scale_edge_width(range = c(0.1, 2)) +
geom_node_text(aes(label = name, filter = percent_rank(centrality_degree(weights = weight)) > 0.5), repel = TRUE) +
theme_graph() +
theme(legend.position = 'bottom')
Hirarchical structures are obviously also relational. The difference is, that the connectivity structure tends to be constraint to other levels.
create_tree(20, 3) %>%
mutate(leaf = node_is_leaf(), root = node_is_root()) %>%
ggraph(layout = 'tree') +
geom_edge_diagonal() +
geom_node_point(aes(filter = leaf), colour = 'forestgreen', size = 10) +
geom_node_point(aes(filter = root), colour = 'firebrick', size = 10) +
theme_graph()
cars_hc <- cars_dist %>%
hclust(method = "ward.D2")
Again, this structure can be directly transfered to a graph object.
g <- cars_hc %>% as_tbl_graph()
g
# A tbl_graph: 63 nodes and 62 edges
#
# A rooted tree
#
# Node Data: 63 x 4 (active)
height leaf label members
<dbl> <lgl> <chr> <int>
1 0 TRUE "Porsche 914-2" 1
2 0 TRUE "Lotus Europa" 1
3 1.62 FALSE "" 2
4 0 TRUE "Honda Civic" 1
5 0 TRUE "Fiat X1-9" 1
6 0 TRUE "Fiat 128" 1
# … with 57 more rows
#
# Edge Data: 62 x 2
from to
<int> <int>
1 3 1
2 3 2
3 8 6
# … with 59 more rows
g %>% ggraph(layout = 'dendrogram') +
geom_edge_diagonal(aes(col = .N()$height[from])) +
geom_node_point(aes(col =height)) +
geom_node_text(aes(filter = leaf, label = label), angle=90, hjust=1, nudge_y=-0.1) +
theme_graph() +
theme(legend.position = 'none')
ylim(-0.6, NA)
<ScaleContinuousPosition>
Range:
Limits: 0 -- 1
## Demonstartion
g
# A tbl_graph: 14 nodes and 15 edges
#
# A directed acyclic simple graph with 3 components
#
# Edge Data: 15 x 2 (active)
from to
<int> <int>
1 1 7
2 1 11
3 1 13
4 1 14
5 2 8
6 2 10
# … with 9 more rows
#
# Node Data: 14 x 1
type
<lgl>
1 FALSE
2 FALSE
3 FALSE
# … with 11 more rows
people <- c('Jesper', 'Pernille', 'Morten', 'Lise', 'Christian', 'Mette', 'Casper', 'Dorte', 'Jacob', 'Helle')
places <- c('Yoga House', 'Crossfit', 'Jazz Club', 'Jomfru Anne Gade')
g <- g %N>%
mutate(name = c(places, people))
g
# A tbl_graph: 14 nodes and 15 edges
#
# A directed acyclic simple graph with 3 components
#
# Node Data: 14 x 2 (active)
type name
<lgl> <chr>
1 FALSE Yoga House
2 FALSE Crossfit
3 FALSE Jazz Club
4 FALSE Jomfru Anne Gade
5 TRUE Jesper
6 TRUE Pernille
# … with 8 more rows
#
# Edge Data: 15 x 2
from to
<int> <int>
1 1 7
2 1 11
3 1 13
# … with 12 more rows
set.seed(1337)
p0 <- g %>% ggraph("bipartite") +
geom_edge_link(alpha = 0.25) +
geom_node_point(aes(col = type, size = centrality_degree(mode = 'all'))) +
geom_node_text(aes(label = name), repel = TRUE) +
theme_graph() +
theme(legend.position = 'none') +
labs(title = '2-mode network places-people')
p0
igraph
function bipartite_projection
to create a 2x 1-mode network out of it.g_projected <- g %>% bipartite_projection()
g_projected
$proj1
IGRAPH 8864ec5 UNW- 4 5 -- Full bipartite graph
+ attr: name (g/c), name (v/c), weight (e/n)
+ edges from 8864ec5 (vertex names):
[1] Yoga House--Jomfru Anne Gade Yoga House--Crossfit Yoga House--Jazz Club
[4] Crossfit --Jazz Club Crossfit --Jomfru Anne Gade
$proj2
IGRAPH 357e4d0 UNW- 10 19 -- Full bipartite graph
+ attr: name (g/c), name (v/c), weight (e/n)
+ edges from 357e4d0 (vertex names):
[1] Pernille--Mette Pernille--Casper Pernille--Dorte Pernille--Jacob Morten --Casper Morten --Jacob
[7] Morten --Helle Lise --Mette Lise --Dorte Lise --Jacob Lise --Helle Mette --Dorte
[13] Mette --Jacob Mette --Casper Casper --Jacob Casper --Helle Casper --Dorte Dorte --Jacob
[19] Jacob --Helle
g_places <- g_projected[['proj1']] %>% as_tbl_graph(directed = FALSE)
g_people <- g_projected[['proj2']] %>% as_tbl_graph(directed = FALSE)
set.seed(1337)
library(patchwork)
p1 <- g_places %>% ggraph(layout = "nicely") +
geom_node_point(aes(size = centrality_degree(weights = weight)), col = 'red') +
geom_edge_link(aes(width = weight), alpha = 0.25) +
scale_edge_width(range = c(0.1, 2)) +
geom_node_text(aes(label = name), repel = TRUE) +
theme_graph() +
theme(legend.position = 'none') +
labs(title = '1-mode network places')
p2 <- g_people %>% ggraph(layout = "nicely") +
geom_node_point(aes(size = centrality_degree(weights = weight)), col = 'skyblue2') +
geom_edge_link(aes(width = weight), alpha = 0.25) +
scale_edge_width(range = c(0.1, 2)) +
geom_node_text(aes(label = name), repel = TRUE) +
theme_graph() +
theme(legend.position = 'none') +
labs(title = '1-mode network people')
p0 / (p1 + p2)
el_2m <- g %E>%
mutate(from_name = .N()$name[from],
to_name = .N()$name[to]) %>%
as.tibble() %>%
select(to_name, from_name) %>%
rename(from = to_name, to = from_name)
el_2m
g_2m <- el_2m %>% as_tbl_graph(directed = TRUE) %N>%
mutate(type = name %in% (el_2m %>% pull(from)))
mat_2m <- el_2m %>%
table() %>%
as.matrix()
mat_2m
to
from Crossfit Jazz Club Jomfru Anne Gade Yoga House
Casper 0 0 1 1
Dorte 1 0 1 0
Helle 0 1 0 1
Jacob 1 0 1 1
Lise 1 1 0 0
Mette 1 0 1 0
Morten 0 0 0 1
Pernille 0 0 1 0
library(Matrix)
mat_2m %<>% Matrix(sparse = TRUE)
mat_2m
8 x 4 sparse Matrix of class "dgCMatrix"
to
from Crossfit Jazz Club Jomfru Anne Gade Yoga House
Casper . . 1 1
Dorte 1 . 1 .
Helle . 1 . 1
Jacob 1 . 1 1
Lise 1 1 . .
Mette 1 . 1 .
Morten . . . 1
Pernille . . 1 .
m %*% t(m)
)mat_people <- mat_2m %*% t(mat_2m)
mat_people
8 x 8 sparse Matrix of class "dgCMatrix"
Casper Dorte Helle Jacob Lise Mette Morten Pernille
Casper 2 1 1 2 . 1 1 1
Dorte 1 2 . 2 1 2 . 1
Helle 1 . 2 1 1 . 1 .
Jacob 2 2 1 3 1 2 1 1
Lise . 1 1 1 2 1 . .
Mette 1 2 . 2 1 2 . 1
Morten 1 . 1 1 . . 1 .
Pernille 1 1 . 1 . 1 . 1
t(m) %*% m
)mat_places <- t(mat_2m) %*% mat_2m
mat_places
4 x 4 sparse Matrix of class "dgCMatrix"
Crossfit Jazz Club Jomfru Anne Gade Yoga House
Crossfit 4 1 3 1
Jazz Club 1 2 . 1
Jomfru Anne Gade 3 . 5 2
Yoga House 1 1 2 4
## Helper function
el_to_sparse_matrix <- function(data, # the edgelist
mode_1, # which variable indicates mode 1
mode_2, # which variable indicates mode 2
projection = 'none' # If a pojection should be done, possible is 'none', 'mode1', 'mode2'
){
# Define inputs
i_input <- data %>% pull({{mode_1}})
j_input <- data %>% pull({{mode_2}})
require(Matrix)
mat <- spMatrix(nrow = i_input %>% n_distinct(),
ncol = j_input %>% n_distinct(),
i = i_input %>% factor() %>% as.numeric(),
j = j_input %>% factor() %>% as.numeric(),
x = rep(1, i_input %>% length() ) )
row.names(mat) <- i_input %>% factor() %>% levels()
colnames(mat) <- j_input %>% factor() %>% levels()
# Projection if necessary
if(projection == 'mode1'){mat %<>% tcrossprod()}
if(projection == 'mode2'){mat %<>% crossprod()}
return(mat)
}
mat_people <- el_2m %>% el_to_sparse_matrix(from, to, projection = 'mode1')
mat_places <- el_2m %>% el_to_sparse_matrix(from, to, projection = 'mode2')
mat_people
8 x 8 sparse Matrix of class "dsCMatrix"
Casper Dorte Helle Jacob Lise Mette Morten Pernille
Casper 2 1 1 2 . 1 1 1
Dorte 1 2 . 2 1 2 . 1
Helle 1 . 2 1 1 . 1 .
Jacob 2 2 1 3 1 2 1 1
Lise . 1 1 1 2 1 . .
Mette 1 2 . 2 1 2 . 1
Morten 1 . 1 1 . . 1 .
Pernille 1 1 . 1 . 1 . 1
mat_places
4 x 4 sparse Matrix of class "dsCMatrix"
Crossfit Jazz Club Jomfru Anne Gade Yoga House
Crossfit 4 1 3 1
Jazz Club 1 2 . 1
Jomfru Anne Gade 3 . 5 2
Yoga House 1 1 2 4
el_people <- el_2m %>% left_join(el_2m, by = 'to') %>%
select(-to) %>%
rename(from = from.x, to = from.y) %>%
filter(from != to) %>%
count(from, to, name = 'weight')
el_people
el_places <- el_2m %>% left_join(el_2m, by = 'from') %>%
select(-from) %>%
rename(from = to.x, to = to.y) %>%
filter(from != to) %>%
count(from, to, name = 'weight')
el_places
Lets talk about bibliographic networks. In short, that are networks between documents which cite each others. That can be (commonly) academic publications, but also patents or policy reports. Conceptually, we can see them as 2 mode networks, between articles and their reference. That helps us to apply some interesting metrics, such as:
Interestingly, different projections of this 2-mode network give the whole resulting 1-mode network a different meaning.
We will here do a brief bibliometric network analysis.
While there exist specialized packages to do it more conveniently (eg. bibliometrix), we will for mximum clarity construct everything somewhat by hand.
I will illustrate more in detail in the following. The example is based on some own work, where i here in very simple way recreate some parts of the analysis.
Rakas, M., & Hain, D. S. (2019). The state of innovation system research: What happens beneath the surface?. Research Policy, 45 (9). DOI: https://doi.org/10.1016/j.respol.2019.04.011
network analysis
in their title, abstract, or keywords.TITLE-ABS-KEY ( "network analysis" ) AND ( LIMIT-TO ( DOCTYPE , "ar" ) OR LIMIT-TO ( DOCTYPE , "cp" ) ) AND ( LIMIT-TO ( LANGUAGE , "English" ) ) AND ( LIMIT-TO ( SRCTYPE , "j" ) OR LIMIT-TO ( SRCTYPE , "p" ) ) AND ( LIMIT-TO ( PUBYEAR , 2021 ) OR LIMIT-TO ( PUBYEAR , 2020 ) OR LIMIT-TO ( PUBYEAR , 2019 ) OR LIMIT-TO ( PUBYEAR , 2018 ) OR LIMIT-TO ( PUBYEAR , 2017 ) OR LIMIT-TO ( PUBYEAR , 2016 ) OR LIMIT-TO ( PUBYEAR , 2015 ) )
csv
. We select al possible fields to download.rm(list=ls())
data <- read_csv('https://github.com/SDS-AAU/SDS-master/raw/master/00_data/networks_bibliometrics/biblio_nw.csv')
data %>%
glimpse()
Rows: 2,000
Columns: 43
$ Authors <chr> "Wang D., Cui P., Zhu W.", "Thorsson V., Gibbs D.L., Brown S.D., Wolf D.…
$ `Author(s) ID` <chr> "56780729900;34568700100;7404232311;", "6602742809;57189045500;571991919…
$ Title <chr> "Structural deep network embedding", "The Immune Landscape of Cancer", "…
$ Year <dbl> 2016, 2018, 2015, 2015, 2015, 2015, 2016, 2015, 2015, 2015, 2015, 2017, …
$ `Source title` <chr> "Proceedings of the ACM SIGKDD International Conference on Knowledge Dis…
$ Volume <chr> "13-17-August-2016", "48", "163", "15", "2015-January", "162", "428", "4…
$ Issue <chr> NA, "4", "3", "15", NA, "2", "4", "2", NA, "11", "1", "6343", "7590", "J…
$ `Art. No.` <chr> NA, NA, NA, NA, NA, "8281", NA, NA, "bav028", NA, NA, NA, NA, "386", NA,…
$ `Page start` <chr> "1225", "812", "712", "2597", "2111", "375", "688", "106", NA, "2490", "…
$ `Page end` <chr> "1234", "830.e14", "723", "2601", "2117", "390", "692", "114", NA, "2502…
$ `Page count` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 16, NA, NA, NA, NA, …
$ `Cited by` <dbl> 781, 607, 507, 468, 418, 404, 378, 377, 363, 356, 354, 347, 347, 328, 32…
$ DOI <chr> "10.1145/2939672.2939753", "10.1016/j.immuni.2018.03.023", "10.1016/j.ce…
$ Link <chr> "https://www.scopus.com/inward/record.uri?eid=2-s2.0-84985034266&doi=10.…
$ Affiliations <chr> "Tsinghua National Laboratory for Information Science and Technology, De…
$ `Authors with affiliations` <chr> "Wang, D., Tsinghua National Laboratory for Information Science and Tech…
$ Abstract <chr> "Network embedding is an important method to learn low-dimensional repre…
$ `Author Keywords` <chr> "Deep learning; Network analysis; Network embedding", "cancer genomics; …
$ `Index Keywords` <chr> "Classification (of information); Data mining; Electric network analysis…
$ `Funding Details` <chr> "National Key Basic Research Program For Youth: 2015CB352300\n\nNational…
$ `Funding Text 1` <chr> "This work was supported by National Program on Key Basic Research Proje…
$ `Funding Text 2` <chr> NA, "Michael Seiler, Peter G. Smith, Ping Zhu, Silvia Buonamici, and Lih…
$ `Funding Text 3` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Funding Text 4` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Funding Text 5` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Funding Text 6` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Funding Text 7` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ References <chr> "Belkin, M., Niyogi, P., Laplacian eigenmaps for dimensionality reductio…
$ `Correspondence Address` <chr> NA, NA, "Hyman, A.A.; Max Planck Institute of Molecular Cell Biology and…
$ Editors <chr> NA, NA, NA, NA, "Wooldridge M.Yang Q.", NA, NA, NA, NA, NA, NA, NA, NA, …
$ Sponsors <chr> "ACM SIGKDD;ACM SIGMOD", NA, NA, NA, "Alibaba.com;Department of Computer…
$ Publisher <chr> "Association for Computing Machinery", "Cell Press", "Cell Press", "Wile…
$ `Conference name` <chr> "22nd ACM SIGKDD International Conference on Knowledge Discovery and Dat…
$ `Conference date` <chr> "13 August 2016 through 17 August 2016", NA, NA, NA, "25 July 2015 throu…
$ `Conference location` <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Conference code` <dbl> 123286, NA, NA, NA, 116754, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ ISSN <chr> NA, "10747613", "00928674", "16159853", "10450823", "00928674", "0022283…
$ ISBN <chr> "9781450342322", NA, NA, NA, "9781577357384", NA, NA, NA, NA, NA, NA, NA…
$ CODEN <chr> NA, "IUNIE", "CELLB", "PROTC", NA, "CELLB", "JMOBA", "NGENE", NA, NA, "N…
$ `Abbreviated Source Title` <chr> "Proc. ACM SIGKDD Int. Conf. Knowl. Discov. Data Min.", "Immunity", "Cel…
$ `Document Type` <chr> "Conference Paper", "Article", "Article", "Article", "Conference Paper",…
$ Source <chr> "Scopus", "Scopus", "Scopus", "Scopus", "Scopus", "Scopus", "Scopus", "S…
$ EID <chr> "2-s2.0-84985034266", "2-s2.0-85044934017", "2-s2.0-84948067587", "2-s2.…
data %<>%
select(EID, Authors, `Author(s) ID`, Title, `Abbreviated Source Title`, Year, Affiliations, `Author Keywords`, `Cited by`, Abstract, References) %>%
rename(author = Authors,
author_id = `Author(s) ID`,
title = Title,
journal = `Abbreviated Source Title`,
year = Year,
affiliation = Affiliations,
keywords = `Author Keywords`,
citations = `Cited by`,
Abstract = Abstract,
references = References)
data[2, 'author']
data[2, 'author_id']
data[2, 'affiliation']
data[2, "keywords"]
data[2, "references"]
data %>% select(title, author, citations) %>%
unnest(author) %>%
head()
data %>% select(title, author, citations) %>%
unnest(author) %>%
group_by(author) %>%
summarise(n = n(),
citations = citations %>% sum(na.rm = TRUE)) %>%
arrange(desc(citations)) %>%
head(10)
I will now transfer them to an article \(\rightarrow\) reference edgelist.
el_2m <- data %>% select(EID, references) %>% unnest(references)
el_2m %>% head()
el_bib <- el_2m %>% left_join(el_2m, by = 'references') %>%
rename(from = EID.x,
to = EID.y) %>%
select(from, to) %>%
filter(from != to) %>%
count(from, to, name = 'weight')
el_bib %>% head()
el_bib %>%
ggplot(aes(x = weight)) +
geom_histogram()
el_bib %<>%
filter(weight >=2)
el_bib %>%
ggplot(aes(x = weight)) +
geom_histogram()
g_bib <- el_bib %>% as_tbl_graph(directed = FALSE) %>%
igraph::simplify() %>%
as_tbl_graph(directed = FALSE)
g_bib
# A tbl_graph: 684 nodes and 1046 edges
#
# An undirected simple graph with 109 components
#
# Node Data: 684 x 1 (active)
name
<chr>
1 2-s2.0-84909633782
2 2-s2.0-84910060076
3 2-s2.0-84912573275
4 2-s2.0-84918503641
5 2-s2.0-84918774371
6 2-s2.0-84919882800
# … with 678 more rows
#
# Edge Data: 1,046 x 3
from to weight
<int> <int> <dbl>
1 1 94 4
2 1 162 6
3 1 255 14
# … with 1,043 more rows
g_bib <- g_bib %N>%
filter(!node_is_isolated()) %N>%
filter(percent_rank(centrality_degree(weights = weight)) >= 0.5)
g_bib <- g_bib %N>%
mutate(com = group_louvain(weights = weight))
morph()
functions of tidygraph
, which basically let you apply group_by
style operations on graph structures, where graph calculations are execited on subgraphs.g_bib <- g_bib %N>%
morph(to_split, com) %>%
mutate(cent_dgr_int = centrality_degree(weights = weight)) %>%
mutate(com_center = cent_dgr_int == max(cent_dgr_int)) %>%
unmorph()
g_bib %N>%
as_tibble() %>%
count(com, sort = TRUE)
g_bib <- g_bib %N>%
add_count(com, name = 'com_n') %>%
mutate(com = ifelse(com_n >= 20, com, NA) ) %>%
select(-com_n)
g_bib <- g_bib %N>%
left_join(data %>% select(EID, title, journal, year, citations), by = c('name' = 'EID')) %>%
mutate(title = title %>% str_trunc(30))
set.seed(1337)
g_bib %>%
ggraph(layout = 'graphopt') +
geom_edge_link(aes(width = weight,
color = .N()$com[from] %>% as.factor()), # Notice that
alpha = 0.5,
show.legend = FALSE) +
scale_edge_width(range = c(0.5, 2)) +
geom_node_point(aes(color = com %>% as.factor(),
size = centrality_degree(weight = weight),
alpha = citations)) +
geom_node_text(aes(label = title, filter = com_center == TRUE), repel = TRUE) +
theme_graph() +
theme(legend.position = 'bottom') +
labs(title = 'Bibliographic Coupling Network',
subtitle = 'Network Analysis 2015-2020',
color = 'Community',
size = 'Degree',
alpha = 'Citations')
data %>%
select(EID, author, year, title, journal, citations) %>%
inner_join(g_bib %N>% as_tibble() %>% select(name, com, cent_dgr_int), by = c('EID' = 'name')) %>%
group_by(com) %>%
arrange(desc(cent_dgr_int)) %>%
slice(1:10) %>%
ungroup() %>%
select(com, title, cent_dgr_int, citations) %>%
mutate(title = title %>% str_trunc(75))
el_cit %>%
ggplot(aes(x = weight)) +
geom_histogram()
g_cit <- el_cit %>% as_tbl_graph(directed = FALSE) %>%
igraph::simplify() %>%
as_tbl_graph(directed = FALSE) %N>%
filter(!node_is_isolated()) %N>%
filter(percent_rank(centrality_degree(weights = weight)) >= 0.5) %N>%
mutate(com = group_louvain(weights = weight)) %N>%
morph(to_split, com) %>%
mutate(cent_dgr_int = centrality_degree(weights = weight)) %>%
mutate(com_center = cent_dgr_int == max(cent_dgr_int)) %>%
unmorph()
g_cit %N>%
as_tibble() %>%
count(com, sort = TRUE)
g_cit <- g_cit %N>%
add_count(com, name = 'com_n') %>%
mutate(com = ifelse(com_n >= 10, com, NA) ) %>%
select(-com_n)
Lets take a look at the network.
set.seed(1337)
g_cit %N>%
mutate(name = name %>% str_trunc(75)) %>%
ggraph(layout = 'graphopt') +
geom_edge_link(aes(width = weight,
color = .N()$com[from] %>% as.factor()), # Notice that
alpha = 0.5,
show.legend = FALSE) +
scale_edge_width(range = c(0.5, 2)) +
geom_node_point(aes(color = com %>% as.factor(),
size = centrality_degree(weight = weight),
alpha = cent_dgr_int)) +
geom_node_text(aes(label = name, filter = com_center == TRUE & percent_rank(cent_dgr_int) > 0.80 ), repel = TRUE) +
theme_graph() +
theme(legend.position = 'bottom') +
labs(title = 'Bibliographic Coupling Network',
subtitle = 'Network Analysis 2015-2020',
color = 'Community',
size = 'Degree',
alpha = 'Citations')
g_cit %N>%
as_tibble() %>%
group_by(com) %>%
arrange(desc(cent_dgr_int)) %>%
slice(1:10) %>%
ungroup() %>%
mutate(name = name %>% str_trunc(75)) %>%
select(com, name, cent_dgr_int)
el_joint <- el_2m %>%
inner_join(g_bib %N>% as_tibble() %>% select(name, com) %>% drop_na(), by = c('EID' = 'name')) %>%
inner_join(g_cit %N>% as_tibble() %>% select(name, com) %>% drop_na(), by = c('references' = 'name')) %>%
rename(from = com.x, to = com.y) %>%
count(from, to, name = 'weight')
el_joint
g_joint <- el_joint %>% as_tbl_graph(directed = TRUE)
g_joint <- g_joint %N>%
mutate(type = name %>% str_detect('bib'))
g_joint %>% ggraph("bipartite") +
geom_edge_link(alpha = 0.25) +
geom_node_point(aes(col = type, size = centrality_degree(mode = 'all'))) +
geom_node_text(aes(label = name), repel = TRUE) +
theme_graph() +
theme(legend.position = 'none')
Please do Exercise 1 in the corresponding section on Github
. This time you are about to do your own bibliographic analysis!
Paper mentioned in the text
Other own work dealing with 2-mode networks
sessionInfo()