Snippets

Dénes Türei Intercellular communication paths with OmnipathR

Created by Dénes Türei last modified
#!/usr/bin/env Rscript

# Copyright Saez Lab 2020
# Denes Turei
# turei.denes@gmail.com

require(dplyr)
require(tidyr)
require(tibble)
require(purrr)
require(readr)
require(igraph)

if(!'OmnipathR' %in% installed.packages()[,"Package"]){
    require(devtools)
    install_github('saezlab/OmnipathR')
}

require(OmnipathR)


interactions <- as_tibble(
    bind_rows(
        import_Omnipath_Interactions(),
        import_LigrecExtra_Interactions(),
        import_PathwayExtra_Interactions(),
        import_KinaseExtra_Interactions()
    )
)

g <- interaction_graph(interactions = interactions)
g <- simplify(g, remove.multiple = FALSE, remove.loops = TRUE)

intercell <- as_tibble(import_Omnipath_intercell())

intercell_sub <- intercell %>%
    filter(class_type == 'sub') %>%
    group_by(uniprot) %>%
    mutate(resources = paste(category, collapse = ','))

intercell <- intercell %>%
    filter(!(class_type %in% c('sub', 'above_main')))

intercell_classes <- intercell %>%
    pull(category) %>%
    unique()

intercell_resources <- intercell_resources %>%
    pull(resources) %>%
    unique

transmitters <- c(
    'ecm',
    'ligand',
    'surface_enzyme',
    'growth_factor_binder',
    'gap_junction',
    'interleukins_hgnc',
    'chemokine_ligands_hgnc',
    'endogenous_ligands_hgnc',
    'adhesion',
    'surface_ligand',
    'extracellular_peptidase',
    'growth_factor_regulator',
    'tight_junction'
)

receivers <- c(
    'receptor',
    'interleukin_receptors_hgnc',
    'transporter',
    'tight_junction',
    'gap_junction',
    'adhesion'
)

intercell_proteins <- intercell %>%
    group_by(category) %>%
    summarize(uniprot = list(unique(uniprot)))

intercell_proteins <- as.list(
    setNames(
        intercell_proteins$uniprot,
        intercell_proteins$category
    )
)

intercell_proteins_sub <- as.list(
    setNames(
        intercell_sub$resources,
        intercell_sub$uniprot
    )
)

intercell_proteins$transmitter <-intercell %>%
    filter(category %in% transmitters) %>%
    select(uniprot) %>%
    unlist(use.names = FALSE) %>%
    unique()

intercell_proteins$receiver <-intercell %>%
    filter(category %in% receivers) %>%
    select(uniprot) %>%
    unlist(use.names = FALSE) %>%
    unique()


for(cls in names(intercell_proteins)){
    
    g <- g %>%
        set_vertex_attr(
            name = cls,
            value = sapply(
                V(g)$up_ids,
                function(x){x %in% intercell_proteins[[cls]]}
            )
        )
    
}

g <- g %>%
    delete_vertices(
        which(!(
            V(g)$receiver |
            V(g)$transmitter
        ))
    )

V(g)$classes <- lapply(
    V(g),
    function(v){
        Filter(
            function(cls){
                vertex_attr(g, cls, v)
            },
            intercell_classes
        )
    }
)


V(g)$resources <- unlist(map(
    V(g)$up_ids,
    function(up){
        intercell_proteins_sub[[up]]
    }
))


trans_trans <- setNames(
    lapply(
        V(g)[V(g)$transmitter],
        function(v){
            Filter(
                function(n){
                    vertex_attr(g, 'transmitter', n) &
                    !vertex_attr(g, 'receiver', n)
                },
                neighbors(g, v, mode=  'out')
            )
        }
    ),
    which(V(g)$transmitter)
)


trans_rec <- setNames(
    lapply(
        V(g)[V(g)$transmitter],
        function(v){
            Filter(
                function(n){
                    vertex_attr(g, 'receiver', n)
                },
                neighbors(g, v, mode=  'out')
            )
        }
    ),
    which(V(g)$transmitter)
)


elist <- as_edgelist(g, names = FALSE) %>%
    as.data.frame() %>%
    `colnames<-`(c('source', 'target')) %>%
    as_tibble() %>%
    add_column(eid = 1:ecount(g)) %>%
    nest(data = c('target', 'eid')) %>%
    mutate(
        data = map(data, as.list),
        data = map(data, function(x){with(x, setNames(eid, target))})
    ) %>%
    as.list() %>%
    with(setNames(data, source))


vpath_to_epath <- function(vpath){
    
    map2_int(
        vpath[1:(length(vpath) - 1)],
        vpath[2:length(vpath)],
        function(vid0, vid1){
            as.integer(get.edge.ids(g, c(vid0, vid1)))
        }
    )
    
}


intercell_vpaths <- list()
i <- 1
prg <- progress_estimated(length(which(V(g)$transmitter)))

for(vid_tr in which(V(g)$transmitter)){
    
    prg$tick()$print()
    
    for(vid_rc in trans_rec[[sprintf('%d', vid_tr)]]){
        
        intercell_vpaths[[i]] <- c(vid_tr, vid_rc)
        i <- i + 1
        
    }
    
    for(vid_m in trans_trans[[sprintf('%d', vid_tr)]]){
        
        for(vid_rc in trans_rec[[sprintf('%d', vid_m)]]){
            
            intercell_vpaths[[i]] <- c(vid_tr, vid_m, vid_rc)
            i <- i + 1
            
        }
        
    }
    
}


prg <- progress_estimated(length(intercell_vpaths))
intercell_epaths <- intercell_vpaths %>%
    map(~{
        prg$tick()$print()
        vpath_to_epath(.x)
    })


paths_vattr <- function(idx, attr){
    
    vertex_attr(g, attr)[
        unlist(map(
            intercell_vpaths,
            function(p){`if`(idx <= length(p), p[idx], NA)}
        ))
    ]
    
}


paths_eattr <- function(idx, attr){
    
    edge_attr(g, attr)[
        unlist(map(
            intercell_epaths,
            function(p){`if`(idx <= length(p), p[idx], NA)}
        ))
    ]
    
}


result <- tibble(
    
    node0_uniprot = paths_vattr(1, 'up_ids'),
    node0_genesymbol = paths_vattr(1, 'name'),
    node0_classes = (
        unlist(map(
            paths_vattr(1, 'classes'),
            function(x){paste(x, collapse = ',')}
        ))
    ),
    node0_intercell_resources = paths_vattr(1, 'resources'),
    
    edge01_directed = paths_eattr(1, 'is_directed'),
    edge01_stimulation = paths_eattr(1, 'is_stimulation'),
    edge01_inhibition = paths_eattr(1, 'is_inhibition'),
    edge01_nsources = paths_eattr(1, 'nsources'),
    edge01_nrefs = paths_eattr(1, 'nrefs'),
    edge01_resources = unlist(
        map(
            paths_eattr(1, 'sources'), paste, collapse = ','
        )
    ),
    
    node1_uniprot = paths_vattr(2, 'up_ids'),
    node1_genesymbol = paths_vattr(2, 'name'),
    node1_classes = (
        unlist(map(
            paths_vattr(2, 'classes'),
            function(x){paste(x, collapse = ',')}
        ))
    ),
    node1_intercell_resources = paths_vattr(2, 'resources'),
    
    edge12_directed = paths_eattr(2, 'is_directed'),
    edge12_stimulation = paths_eattr(2, 'is_stimulation'),
    edge12_inhibition = paths_eattr(2, 'is_inhibition'),
    edge12_nsources = paths_eattr(2, 'nsources'),
    edge12_nrefs = paths_eattr(2, 'nrefs'),
    edge12_resources = unlist(
        map(
            paths_eattr(2, 'sources'), paste, collapse = ','
        )
    ),
    
    node2_uniprot = paths_vattr(3, 'up_ids'),
    node2_genesymbol = paths_vattr(3, 'name'),
    node2_classes = (
        unlist(map(
            paths_vattr(3, 'classes'),
            function(x){paste(x, collapse = ',')}
        ))
    ),
    node2_intercell_resources = paths_vattr(3, 'resources')
    
)

write_tsv(result, 'omnipath_intercell_paths__20200318.tsv')

Comments (1)

  1. Linda Melson
HTTPS SSH

You can clone a snippet to your computer for local editing. Learn more.