# Copyright Saez Lab 2020
+# Authors: Denes Turei and Alberto Valdeolivas
# Contact: turei.denes@gmail.com
# License: MIT License https://opensource.org/licenses/MIT
if(!'OmnipathR' %in% installed.packages()[,"Package"]){
# URLs for supplementary tables S2 and S3 from Gordon et al. 2020
# https://www.biorxiv.org/content/10.1101/2020.03.22.002386v2
- 'https://www.biorxiv.org/content/biorxiv/early/',
- '2020/03/23/2020.03.22.002386/DC4/embed/media-4.xlsx?download=true'
- 'https://www.biorxiv.org/content/biorxiv/early/',
- '2020/03/23/2020.03.22.002386/DC5/embed/media-5.xlsx?download=true'
+ 'https://www.biorxiv.org/content/biorxiv/early/',
+ '2020/03/23/2020.03.22.002386/DC4/embed/media-4.xlsx?download=true'
+ 'https://www.biorxiv.org/content/biorxiv/early/',
+ '2020/03/23/2020.03.22.002386/DC5/embed/media-5.xlsx?download=true'
# the OmniPath PPI interaction network
ia_omnipath <- import_Omnipath_Interactions() %>% as_tibble()
human_sarscov2_compounds_raw <- read.xlsx(url_s3) %>% as_tibble()
# combining all the interactions to one data frame
-ia_human_sarscov2 <- human_sarscov2_raw %>%
target_genesymbol = 'BSG'
-interactions <- as_tibble(
- ia_omnipath %>% mutate(type = 'ppi'),
- ia_pwextra %>% mutate(type = 'ppi'),
- ia_kinaseextra %>% mutate(type = 'ppi'),
- ia_ligrec %>% mutate(type = 'ppi'),
- ia_transcriptional %>% mutate(type = 'transcriptional'),
- ia_human_sarscov2 %>% mutate(
- source_genesymbol = sub('SARS-CoV2 ', '', source_genesymbol),
- type = 'host_pathogen',
- # the direction or effect of these interactions is unknown
- consensus_direction = 0,
- consensus_stimulation = 0,
- consensus_inhibition = 0,
+ ia_omnipath %>% mutate(type = 'ppi'),
+ ia_pwextra %>% mutate(type = 'ppi'),
+ ia_kinaseextra %>% mutate(type = 'ppi'),
+ ia_ligrec %>% mutate(type = 'ppi'),
+ ia_transcriptional %>% mutate(type = 'transcriptional'),
+ ia_human_sarscov2 %>% mutate(
+ source_genesymbol = sub('SARS-CoV2 ', '', source_genesymbol),
+ type = 'host_pathogen',
+ # the direction or effect of these interactions is unknown
+ consensus_direction = 0,
+ consensus_stimulation = 0,
+ consensus_inhibition = 0
+ summarize_all(first) %>%
+ curation_effort = ifelse(
+ type == 'host_pathogen',
+ # for our purpose these interactions are of high importance
+ # hence we assign a high "weight"
+ is.na(curation_effort),
+ # no curation effort means no literature evidence available
+ # here we assign a small weight < 1
+ # remove any ambiguity between gene symbols and uniprot ids
+ mutate(source_genesymbol = first(source_genesymbol)) %>%
+ mutate(target_genesymbol = first(target_genesymbol)) %>%
+# Obtaining pneumocyte protein expression data from the Human Protein Atlas
+# using the annotations database of OmniPath
+# From the annotations database you can access a broad variety of data about
+# proteins: function, localization, structure, expression, etc, overall
+# from more than 40 resources. See a table of resources, properties and
+# values at http://omnipathdb.org/annotations_summary or use the
+# `get_annotation_databases` method from the OmnipathR package.
+ interactions %>% filter(type != 'host_pathogen') %>% pull(source),
+ interactions %>% pull(target)
+pneumocyte_expression <-
+ import_Omnipath_annotations(
+ filter_databases = c('HPA_tissue'),
+ select_genes = human_proteins
+ spread(label, value) %>%
+ filter(organ == 'lung', tissue == 'pneumocytes')
# creating an igraph network from the interactions data frame
-net_human_sarscov2 <- interaction_graph(interactions = interactions) %>%
- simplify(remove.multiple = FALSE, remove.loops = TRUE)
+ interaction_graph(interactions = interactions) %>%
+ igraph::simplify(remove.multiple = FALSE, remove.loops = TRUE)
# labeling the proteins by organism
sarscov2_proteins <- ia_human_sarscov2 %>% pull(source) %>% unique()
-V(net_human_sarscov2)$organism <- ifelse(
- V(net_human_sarscov2)$up_ids %in% sarscov2_proteins,
+V(net_human_sarscov2)$organism <-
+ V(net_human_sarscov2)$up_ids %in% sarscov2_proteins,
# labeling direct viral targets and transcription factors
-V(net_human_sarscov2)$viral_target <- (
+V(net_human_sarscov2)$viral_target <-
V(net_human_sarscov2)$up_ids %in% (
ia_human_sarscov2 %>% pull(target) %>% unique()
-V(net_human_sarscov2)$tf <- (
+V(net_human_sarscov2)$tf <-
V(net_human_sarscov2)$up_ids %in% (
ia_transcriptional %>% pull(source) %>% unique()
# node indices of the SARS-CoV2 proteins
-nodes_sarscov2 <- which(V(net_human_sarscov2)$organism == 'SARS-CoV2')
+nodes_sarscov2 <- (V(net_human_sarscov2)$organism == 'SARS-CoV2') %>% which()
+# adding pneumocyte expression levels as a vertex attribute
+expression_by_protein <-setNames(
+ pneumocyte_expression %>% pull(level),
+ pneumocyte_expression %>% pull(uniprot)
+V(net_human_sarscov2)$expression <-
+ V(net_human_sarscov2)$up_ids %>%
+ uniprot %in% names(expression_by_protein),
+ expression_by_protein[[uniprot]],
# creating subgraphs of the neighborhood of each virus protein
-neighborhoods_human_sarscov2_1 <- make_ego_graph(
- nodes = nodes_sarscov2,
+neighborhoods_human_sarscov2_1 <-
+ nodes = nodes_sarscov2,
-nodes_in_neighborhoods <- neighborhoods_human_sarscov2_1 %>%
+nodes_in_neighborhoods <-
+ neighborhoods_human_sarscov2_1 %>%
map(function(g){V(g)$name}) %>%
# reducing the network to the neighborhoods
-net_human_sarscov2_1 <- net_human_sarscov2 %>%
!V(net_human_sarscov2)$name %in% nodes_in_neighborhoods
-net_human_sarscov2_1 <- net_human_sarscov2_1 %>%
+# reducing the network to the viral targets having PPI among
+ net_human_sarscov2_1 %>%
V(net_human_sarscov2_1)$viral_target |
-nodes_having_ppi <- net_human_sarscov2_1 %>%
+ net_human_sarscov2_1 %>%
E(net_human_sarscov2_1)$type %in% c('ppi', 'transcriptional')
-sarscov2_nodes <- which(V(net_human_sarscov2_1)$organism == 'SARS-CoV2')
+ (V(net_human_sarscov2_1)$organism == 'SARS-CoV2') %>%
nodes_to_keep <- c(nodes_having_ppi, sarscov2_nodes) %>% unique()
-net_human_sarscov2_1 <- net_human_sarscov2_1 %>%
+ net_human_sarscov2_1 %>%
induced_subgraph(nodes_to_keep)
-net_human_sarscov2_1 <- net_human_sarscov2_1 %>%
+ net_human_sarscov2_1 %>%
delete_vertices(degree(.) == 0)
# creating a figure of the direct neighborhood network
-fr_layout <- qgraph.layout.fruchtermanreingold(
- get.edgelist(net_human_sarscov2_1, names = FALSE),
- vcount = vcount(net_human_sarscov2_1),
- area = vcount(net_human_sarscov2_1) ** 2.3,
- repulse.rad = vcount(net_human_sarscov2_1) ** 2.1,
+ net_human_sarscov2_1 %>%
+ get.edgelist(names = FALSE) %>%
+ qgraph.layout.fruchtermanreingold(
+ vcount = vcount(net_human_sarscov2_1),
+ area = vcount(net_human_sarscov2_1) ** 2.3,
+ repulse.rad = vcount(net_human_sarscov2_1) ** 2.1,
filename = 'SARS-CoV2_OmniPath_neighborhood.png',
+# igraph is not able to plot arrows with and without head at the same time
+# plotting first only the undirected edges, then the directed
+ E(net_human_sarscov2_1)$is_directed,
- vertex.label.cex = .33,
+ vertex.label.cex = .66,
+ vertex.label.degree = -pi * .75,
V(net_human_sarscov2_1)$organism == 'SARS-CoV2',
E(net_human_sarscov2_1)$is_stimulation,
+ E(this_community)$is_directed,
- edge.arrow.size = ifelse(
+# creating the same plot with showing the pneumocyte expression levels
+ filename = 'SARS-CoV2_OmniPath_neighborhood_pneumocytes.png',
E(net_human_sarscov2_1)$is_directed,
+ vertex.label.cex = .66,
+ vertex.label.degree = -pi * .75,
+ V(net_human_sarscov2_1)$organism == 'SARS-CoV2',
+ V(net_human_sarscov2_1)$expression == 'High',
+ V(net_human_sarscov2_1)$expression == 'Medium',
+ V(net_human_sarscov2_1)$expression == 'Low',
+ vertex.label.family = 'DINPro',
+ vertex.label.color = '#454447',
+ vertex.frame.width = 0,
+ vertex.frame.color = NA,
+ E(net_human_sarscov2_1)$type == 'host_pathogen',
+ E(net_human_sarscov2_1)$is_inhibition,
+ E(net_human_sarscov2_1)$is_stimulation,
+ E(this_community)$is_directed,
-# E(net_human_sarscov2_1)$type == 'transcriptional',
-# E(net_human_sarscov2_1)$type == 'host_pathogen',
+# creating a function for recursive clustering, i.e. to call the methods
+# repeatedly until also large clusters are chopped into pieces of the
+cluster_recursive <- function(
+ method = cluster_louvain,
+ if(is.null(membership)){
+ membership <- rep('1', vcount(graph))
+ cl_sizes <- table(membership)
+ for(cl_idx in names(cl_sizes)[which(cl_sizes > max_size)]){
+ this_cluster_idx <- which(membership == cl_idx)
+ 'Cluster %s has a size of %d, clustering further.',
+ length(this_cluster_idx)
+ induced_subgraph(this_cluster_idx)
+ subclusters <- method(this_cluster_graph, ...)
+ membership[this_cluster_idx] <-
+ subclusters$membership %>%
+ map(function(m){sprintf('%s.%d', cl_idx, m)}) %>%
+ if(max(table(membership)) <= max_size){
+# using curation effort as weight for a better clustering
+E(net_human_sarscov2)$weight <- E(net_human_sarscov2)$curation_effort
+# some clustering methods work only with undirected and simple graphs
+net_human_sarscov2_undir <-
+# clustering the network by the louvain algorithm
+human_sarscov2_louvain <-
+ net_human_sarscov2_undir %>%
+ cluster_recursive(method = cluster_louvain, max_size = 150)
+names(human_sarscov2_louvain) <- V(net_human_sarscov2_undir)$name
+# creating a vertex attribute from cluster memberships
+V(net_human_sarscov2)$cluster <-
+ V(net_human_sarscov2)$name %>%
+ name %in% names(human_sarscov2_louvain),
+ human_sarscov2_louvain[[name]],
+sarscov2_proteins <- interactions %>%
+ filter(type == 'host_pathogen') %>%
+human_target_proteins <- interactions %>%
+ filter(type == 'host_pathogen') %>%
+# shortcut for the vertex sequence of the large network
+vs <- V(net_human_sarscov2)
+# plotting each cluster containing viral targets
+dir.create('clusters', showWarnings = FALSE)
+list.files('clusters') %>%
+walk(function(f){file.remove(file.path('clusters', f))})
+ 'SARS-CoV2_OmniPath_clusters_pneumocytes.pdf',
+for(cl_id in unique(vs$cluster)){
+ vs$up_ids[which(vs$cluster == cl_id)] %>%
+ intersect(human_target_proteins) %>%
+ if(has_target_proteins){
+ # the members of the cluster and all virus proteins
+ vs$up_ids %in% sarscov2_proteins
+ # removing nodes with no connections
+ delete_vertices(which(degree(this_community) == 0))
+ if(vcount(this_community) == 0) next
+ get.edgelist(names = FALSE) %>%
+ qgraph.layout.fruchtermanreingold(
+ vcount = vcount(this_community),
+ area = vcount(this_community) ** 2.3,
+ repulse.rad = vcount(this_community) ** 2.1,
+ 'SARS-CoV2_OmniPath_cluster-%s_pneumocytes.png',
+ 'Community %s with %d members and %d connections',
+ vcount(this_community),
+ E(this_community)$is_directed,
+ vertex.label.cex = .66,
+ vertex.label.degree = -pi * .75,
+ V(this_community)$organism == 'SARS-CoV2',
+ V(this_community)$expression == 'High',
+ V(this_community)$expression == 'Medium',
+ V(this_community)$expression == 'Low',
+ vertex.label.family = 'DINPro',
+ vertex.label.color = '#454447',
+ vertex.frame.width = 0,
+ vertex.frame.color = NA,
+ E(this_community)$type == 'host_pathogen',
+ E(this_community)$is_inhibition,
+ E(this_community)$is_stimulation,
+ E(this_community)$is_directed,