#!/usr/bin/env Rscript
# Denes Turei EMBL 2017
# turei.denes@gmail.com
require(dplyr)
require(tidyr)
require(readr)
require(tibble)
require(gplots)
infile <- '171004_matrix_semisupervisedclustering.csv'
d <- suppressMessages(read_csv(infile))
d0 <- d %>%
group_by(protein, membrane) %>%
mutate(m = mean(norm_filt__binding)) %>%
summarise_all(first) %>%
ungroup() %>%
select(protein, membrane, m) %>%
spread(protein, m) %>%
remove_rownames() %>%
as.data.frame() %>%
column_to_rownames('membrane')
dst <- dist(d0)
cl <- hclust(dst, method = 'ward.D2')
mem_ordr <- cl$labels[cl$order]
d_ord <- d %>%
mutate(
membrane = factor(membrane, levels = mem_ordr, ordered = TRUE),
concentration = factor(concentration, levels = c(NA, 2, 5, 10), ordered = TRUE)
) %>%
arrange(membrane, protein, concentration)
d_ord_m <- d_ord %>%
spread(protein, norm_filt__binding) %>%
mutate(membrane = paste(membrane, concentration, sep = '_')) %>%
remove_rownames() %>%
as.data.frame()
rownames(d_ord_m) <- d_ord_m[,1]
d_ord_m <- d_ord_m[,-c(1,2)]
pal <- colorRampPalette(c("white", "dark blue"), space="rgb")
cairo_pdf('lima_clustering.pdf', width = 18, height = 12)
FDS_heatmap <- heatmap.2(
as.matrix(t(d_ord_m)),
Rowv=TRUE,
Colv=FALSE,
cexRow=0.6,
cexCol=0.6,
symm = FALSE,
distfun = function(FDS) dist(FDS,method = "euclidean"),
hclustfun = function(FDS) hclust(FDS,method = "ward.D2"),
dendrogram = c("row"),
trace=c("none"),
colsep=0:ncol(d_ord_m),
rowsep=0:nrow(d_ord_m),
sepwidth=c(0.001, 0.001),
sepcolor="light grey",
symkey=FALSE,
keysize = 0.8,
key.title="none",
key.xlab="log normalized NBI",
key.ylab="none",
key.par=list(cex.lab=1,
cex.axis=0.8),
density.info="none",
symbreaks=FALSE,
breaks = seq(0, 2.5, length.out = 60),
na.color = "grey",
col = pal(59),
scale="none",
margins=c(9,9),
main="Recruitment of LTP on membrane surrogates",
xlab = "membranes",
ylab = "sfGFP fusion proteins"
)
dev.off()