Commits

Giovanni Marco Dall'Olio committed 865f886

REFACT: changing the way how the interesting genes plots are generated

Comments (0)

Files changed (2)

results/plots/n-glycan/figure2_fst.pdf

Binary file modified.

src/scripts/plot_interestinggenes_figure23_revision.R

 xlabels[3] <- 'gene center'
 
 # --- plot Fst ---
-pdf("../results/plots/n-glycan/figure2_fst.pdf", 10, 8, page='a4r', title='fig 2')
 all_fst <- read.table("../results/tables/n-glycan/fst_by_snp_n-glycan_400000.txt", header=T)
 testtype.label = expression(F[ST])
 scale_limits = c(0, 0.4)
 interesting.fst.raw <- subset(interesting.fst, select=c(1, 2, 3, 5, 7, 9, 11, 13, 15, 17:25))
 interesting.fst.raw.long <- melt(interesting.fst.raw, id.vars=c(1, 2, 10:18))
 interesting.fst.raw.long$pop <- factor(toupper(interesting.fst.raw.long$variable), levels=c('SSAFR', 'MENA', 'EUR', 'CSASIA', 'EASIA', 'AME', 'OCE'))
-ggplot(data=interesting.fst.raw.long, aes(x=distance_from_gene_center, y = value)) + geom_point(aes(colour=factor(position)), size=1) + facet_grid(pop~gene) + scale_x_continuous('Distance from gene center', limits=c(-window_size, window_size),breaks=xbreaks, labels=xlabels) + scale_y_continuous(testtype.label, limits=scale_limits) + opts(axis.title.y=theme_text(vjust=0.5, angle=90)) +  opts(axis.text.x=theme_text(angle=90, hjust=0.4 )) + stat_smooth(colour='blue', size=0.5, label='interpolation of data', method=loess, span=0.4) + scale_colour_manual(limits=c('upstream', 'inside_gene', 'downstream'), values=c('gray75', 'gray42', 'gray80')) +  opts(legend.position = 'none') 
+fst_fixedscales_nosignificant_plot <- ggplot(data=interesting.fst.raw.long, aes(x=distance_from_gene_center, y = value)) + geom_point(aes(colour=factor(position)), size=1) + facet_grid(pop~gene) + scale_x_continuous('Distance from gene center', limits=c(-window_size, window_size),breaks=xbreaks, labels=xlabels) + scale_y_continuous(testtype.label, limits=scale_limits) + opts(axis.title.y=theme_text(vjust=0.5, angle=90)) +  opts(axis.text.x=theme_text(angle=90, hjust=0.4 )) + stat_smooth(colour='blue', size=0.5, label='interpolation of data', method=loess, span=0.4) + scale_colour_manual(limits=c('upstream', 'inside_gene', 'downstream'), values=c('gray75', 'gray42', 'gray80')) +  opts(legend.position = 'none') 
 # scales = free
-ggplot(data=interesting.fst.raw.long, aes(x=distance_from_gene_center, y = value)) + geom_point(aes(colour=factor(position)), size=1) + facet_grid(pop~gene, scales='free') + scale_x_continuous('Distance from gene center', limits=c(-window_size, window_size),breaks=xbreaks, labels=xlabels) + scale_y_continuous(testtype.label) + opts(axis.title.y=theme_text(vjust=0.5, angle=90)) +  opts(axis.text.x=theme_text(angle=90, hjust=0.4 )) + stat_smooth(colour='blue', size=0.5, label='interpolation of data', method=loess, span=0.4) + scale_colour_manual(limits=c('upstream', 'inside_gene', 'downstream'), values=c('gray75', 'gray42', 'gray80')) +  opts(legend.position = 'none') 
+fst_freescales_nosignificant_plot <- ggplot(data=interesting.fst.raw.long, aes(x=distance_from_gene_center, y = value)) + geom_point(aes(colour=factor(position)), size=1) + facet_grid(pop~gene, scales='free') + scale_x_continuous('Distance from gene center', limits=c(-window_size, window_size),breaks=xbreaks, labels=xlabels) + scale_y_continuous(testtype.label) + opts(axis.title.y=theme_text(vjust=0.5, angle=90)) +  opts(axis.text.x=theme_text(angle=90, hjust=0.4 )) + stat_smooth(colour='blue', size=0.5, label='interpolation of data', method=loess, span=0.4) + scale_colour_manual(limits=c('upstream', 'inside_gene', 'downstream'), values=c('gray75', 'gray42', 'gray80')) +  opts(legend.position = 'none') 
 
 # --- plot Fst, highlight significant values ---
 fst.significant <- data.frame('gene' = c("MAN2A2", "MGAT3", "B4GALT2", "MGAT4A", "MGAT4A", "ST3GAL4", "ST8SIA3", "ST8SIA6", "DPM1", "DPM3", "PMM1"), 
     'pop' = c("CSASIA", "CSASIA", "MENA", "EUR", "EASIA", "SSAFR", "EUR", "CSASIA", "MENA", "CSASIA", "EASIA"), significant=T)
 fst.significant$significant = T
 interesting.fst.raw.long.significant <- merge(interesting.fst.raw.long, fst.significant, by=c("gene", "pop"), all.x=T)
-ggplot(data=interesting.fst.raw.long.significant, aes(x=distance_from_gene_center, y = value)) + geom_rect(data=subset(interesting.fst.raw.long.significant, significant == T), aes(fill=as.factor(significant)), xmin = -Inf, xmax = Inf, ymin = -Inf,ymax = Inf, alpha = 0.1) + geom_point(aes(colour=factor(position)), size=1) + facet_grid(pop~gene) + scale_x_continuous('Distance from gene center', limits=c(-window_size, window_size),breaks=xbreaks, labels=xlabels) + scale_y_continuous(testtype.label, limits=scale_limits) + opts(axis.title.y=theme_text(vjust=0.5, angle=90)) +  opts(axis.text.x=theme_text(angle=90, hjust=0.4 )) + stat_smooth(colour='blue', size=0.5, label='interpolation of data', method=loess, span=0.4) + scale_colour_manual(limits=c('upstream', 'inside_gene', 'downstream'), values=c('gray75', 'gray42', 'gray80')) +  opts(legend.position = 'none') 
+fst_fixedscales_significant_plot <- ggplot(data=interesting.fst.raw.long.significant, aes(x=distance_from_gene_center, y = value)) + geom_rect(data=subset(interesting.fst.raw.long.significant, significant == T), aes(fill=as.factor(significant)), xmin = -Inf, xmax = Inf, ymin = -Inf,ymax = Inf, alpha = 0.1) + geom_point(aes(colour=factor(position)), size=1) + facet_grid(pop~gene) + scale_x_continuous('Distance from gene center', limits=c(-window_size, window_size),breaks=xbreaks, labels=xlabels) + scale_y_continuous(testtype.label, limits=scale_limits) + opts(axis.title.y=theme_text(vjust=0.5, angle=90)) +  opts(axis.text.x=theme_text(angle=90, hjust=0.4 )) + stat_smooth(colour='blue', size=0.5, label='interpolation of data', method=loess, span=0.4) + scale_colour_manual(limits=c('upstream', 'inside_gene', 'downstream'), values=c('gray75', 'gray42', 'gray80')) +  opts(legend.position = 'none') 
 # scales = free
-ggplot(data=interesting.fst.raw.long.significant, aes(x=distance_from_gene_center, y = value)) + geom_rect(data=subset(interesting.fst.raw.long.significant, significant == T), aes(fill=as.factor(significant)), xmin = -Inf, xmax = Inf, ymin = -Inf,ymax = Inf, alpha = 0.1) + geom_point(aes(colour=factor(position)), size=1) + facet_grid(pop~gene, scales='free') + scale_x_continuous('Distance from gene center', limits=c(-window_size, window_size),breaks=xbreaks, labels=xlabels) + scale_y_continuous(testtype.label) + opts(axis.title.y=theme_text(vjust=0.5, angle=90)) +  opts(axis.text.x=theme_text(angle=90, hjust=0.4 )) + stat_smooth(colour='blue', size=0.5, label='interpolation of data', method=loess, span=0.4) + scale_colour_manual(limits=c('upstream', 'inside_gene', 'downstream'), values=c('gray75', 'gray42', 'gray80')) +  opts(legend.position = 'none') 
+fst_freescales_significant_plot <- ggplot(data=interesting.fst.raw.long.significant, aes(x=distance_from_gene_center, y = value)) + geom_rect(data=subset(interesting.fst.raw.long.significant, significant == T), aes(fill=as.factor(significant)), xmin = -Inf, xmax = Inf, ymin = -Inf,ymax = Inf, alpha = 0.1) + geom_point(aes(colour=factor(position)), size=1) + facet_grid(pop~gene, scales='free') + scale_x_continuous('Distance from gene center', limits=c(-window_size, window_size),breaks=xbreaks, labels=xlabels) + scale_y_continuous(testtype.label) + opts(axis.title.y=theme_text(vjust=0.5, angle=90)) +  opts(axis.text.x=theme_text(angle=90, hjust=0.4 )) + stat_smooth(colour='blue', size=0.5, label='interpolation of data', method=loess, span=0.4) + scale_colour_manual(limits=c('upstream', 'inside_gene', 'downstream'), values=c('gray75', 'gray42', 'gray80')) +  opts(legend.position = 'none') 
+
+pdf("../results/plots/n-glycan/figure2_fst.pdf", 10, 8, page='a4r', title='fig 2')
+print(fst_fixedscales_nosignificant_plot)
+print(fst_freescales_nosignificant_plot)
+print(fst_fixedscales_significant_plot)
+print(fst_freescales_significant_plot)
+
+dev.off()
 
 # --- plot Fst, removing oce and ame ---
 #interesting.fst.raw.long.no_oce_ame <- subset(interesting.fst.raw.long, !pop %in% c("oce", "ame"))
 #ggplot(data=interesting.fst.raw.long.no_oce_ame, aes(x=distance_from_gene_center, y = value)) + geom_point(aes(colour=factor(position)), size=1) + facet_grid(pop~gene) + scale_x_continuous('Distance from gene center', limits=c(-window_size, window_size),breaks=xbreaks, labels=xlabels) + scale_y_continuous(testtype.label, limits=scale_limits) + opts(axis.title.y=theme_text(vjust=0.5, angle=90)) +  opts(axis.text.x=theme_text(angle=90, hjust=0.4 )) + stat_smooth(colour='blue', size=0.5, label='interpolation of data', method=loess, span=0.4) + scale_colour_manual(limits=c('upstream', 'inside_gene', 'downstream'), values=c('gray75', 'gray42', 'gray80')) +  opts(legend.position = 'none') 
-dev.off()
 
 pdf("../results/plots/n-glycan/figure3_iHS.pdf", 18, 8, page='a4r', title='fig 3')
 all_ihs <- read.table("../results/tables/n-glycan/ihs_by_snp_n-glycan_400000.txt", header=T)