+boop.dir <- function(working.dir, set.dir = FALSE){
+ if( !file.exists(working.dir)){
+ if( !dir.create(working.dir, recursive = TRUE) ){
+ stop(paste("Unable to create directory: ", working.dir, "!\n", sep = ''))
+get_split <- function(x, sp = "-", val = 1){
+ out.split = unlist(strsplit(x, split = sp, fixed = TRUE))[val]
+split.n.paste <- function(str, sp = ":", start = 1, end = 3, non.sequential = NULL, co = ':'){
+ boop <- strsplit(x = str, split = sp)
+ if(is.null(non.sequential)){
+ boop <- paste(boop[start:end], collapse = co)
+ if(!is.null(non.sequential)){
+ boop <- paste(boop[non.sequential], collapse = co)
+classify_based_on_TILs <- function(df){
+ df <- df[!is.na(df$cd8.score.danaher),]
+ #df <- df[!is.na(df$pathology_TILs),]
+ df_summary <- aggregate(df$pathology_TILs, by = list(df$immune_cluster), FUN = median, na.rm=TRUE)
+ df_summary$diff <- abs(tmpTILs - df_summary$x)
+ return(df_summary$Group.1[which.min(df_summary$diff)])
+cor.mtest <- function(mat, ...) {
+ p.mat <- lowCI.mat <- uppCI.mat <- matrix(NA, n, n)
+ diag(lowCI.mat) <- diag(uppCI.mat) <- 1
+ tmp <- cor.test(x = mat[,i], y = mat[,j], ...)
+ p.mat[i,j] <- p.mat[j,i] <- tmp$p.value
+ # only "pearson" method provides confidence intervals
+ if (!is.null(tmp$conf.int)) {
+ lowCI.mat[i,j] <- lowCI.mat[j,i] <- tmp$conf.int[1]
+ uppCI.mat[i,j] <- uppCI.mat[j,i] <- tmp$conf.int[2]
+plot.with.confidence <- function(x, y, x2 = NULL, y2 = NULL, lab = NULL, ci = 0.95, ...){
+ mini_function <- function(x, y, lab = NULL, ci, ...){
+ z <- c(which(is.na(x)), which(is.na(y)))
+ mod <- lm(y ~ x, data = df)
+ new.x <- seq(min(df$x), max(df$x), length.out=length(x))
+ #new.y <- seq(min(y), max(y), length.out = length(y))
+ preds <- predict(mod, newdata = data.frame(x = new.x), interval = 'confidence', level = ci)
+ polygon(c(rev(new.x), new.x), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey90', border = NA)
+ lines(new.x, preds[ ,3], lty = 'dashed', col = 'red')
+ lines(new.x, preds[ ,2], lty = 'dashed', col = 'red')
+ cor.p <- cor.test(x,y, method = 's')$p.value
+ cor.r <- cor.test(x,y, method = 's')$estimate
+ text(x = 0.7*xmax, y = 0.93*ymax, labels = paste('p: ', formatC(cor.p, format = 'e', digits = 1), '\n', 'rho: ', round(cor.r, digits = 2), sep= ''), pos = 4)
+ # predict the original values
+ real.mod <- predict(mod, level = 0.95, interval = "confidence")
+ # and are the data points inside the CI or outside of it
+ inside.interval <- real.mod[,'lwr'] < y & y < real.mod[,'upr']
+ inside.patients <- lab[inside.interval]
+ outside.patients <- lab[!inside.interval]
+ outside.patients <- c()
+ full.model <- list(x, y, lab, new.x, preds, mod, inside.patients, outside.patients)
+ names(full.model) <- c('x', 'y', 'lab', 'new.x', 'preds', 'mod', 'inside.patients', 'outside.patients')
+ plot(x = c(x, x2), y = c(y, y2), type = 'n', ...)
+ mini_function(x = x, y = y, lab = lab, ci = ci, ...)
+ if(!is.null(x2) & !is.null(y2)){
+ mini_function(x = x2, y = y2, lab = lab, ci = ci, ...)
+heatmap.3 <- function(x,
+ Rowv = TRUE, Colv = if (symm) "Rowv" else TRUE,
+ dendrogram = c("both","row", "column", "none"),
+ scale = c("none","row", "column"),
+ revC = identical(Colv,"Rowv"),
+ symbreaks = max(x < 0, na.rm = TRUE) || scale != "none",
+ sepwidth = c(0.05, 0.05),
+ trace = c("none", "column","row", "both"),
+ hline = median(breaks),
+ vline = median(breaks),
+ side.height.fraction=0.3,
+ cexRow = 0.2 + 1/log10(nr),
+ cexCol = 0.2 + 1/log10(nc),
+ density.info = c("none", "histogram", "density"),
+ symkey = max(x < 0, na.rm = TRUE) || symbreaks,
+ KeyValueName="Value",...){
+ invalid <- function (x) {
+ if (missing(x) || is.null(x) || length(x) == 0)
+ return(all(sapply(x, invalid)))
+ scale01 <- function(x, low = min(x), high = max(x)) {
+ x <- (x - low)/(high - low)
+ scale <- if (symm && missing(scale))
+ dendrogram <- match.arg(dendrogram)
+ trace <- match.arg(trace)
+ density.info <- match.arg(density.info)
+ if (length(col) == 1 && is.character(col))
+ col <- get(col, mode = "function")
+ if (!missing(breaks) && (scale != "none"))
+ warning("Using scale=\"row\" or scale=\"column\" when breaks are",
+ "specified can produce unpredictable results.", "Please consider using only one or the other.")
+ if (is.null(Rowv) || is.na(Rowv))
+ if (is.null(Colv) || is.na(Colv))
+ else if (Colv == "Rowv" && !isTRUE(Rowv))
+ if (length(di <- dim(x)) != 2 || !is.numeric(x))
+ stop("`x' must be a numeric matrix")
+ if (nr <= 1 || nc <= 1)
+ stop("`x' must have at least 2 rows and 2 columns")
+ if (!is.numeric(margins) || length(margins) != 2)
+ stop("`margins' must be a numeric vector of length 2")
+ cellnote <- matrix("", ncol = ncol(x), nrow = nrow(x))
+ if (!inherits(Rowv, "dendrogram")) {
+ if (((!isTRUE(Rowv)) || (is.null(Rowv))) && (dendrogram %in%
+ if (is.logical(Colv) && (Colv))
+ else dedrogram <- "none"
+ warning("Discrepancy: Rowv is FALSE, while dendrogram is `",
+ dendrogram, "'. Omitting row dendogram.")
+ if (!inherits(Colv, "dendrogram")) {
+ if (((!isTRUE(Colv)) || (is.null(Colv))) && (dendrogram %in%
+ c("both", "column"))) {
+ if (is.logical(Rowv) && (Rowv))
+ else dendrogram <- "none"
+ warning("Discrepancy: Colv is FALSE, while dendrogram is `",
+ dendrogram, "'. Omitting column dendogram.")
+ if (inherits(Rowv, "dendrogram")) {
+ rowInd <- order.dendrogram(ddr)
+ else if (is.integer(Rowv)) {
+ hcr <- hclustfun(distfun(x))
+ ddr <- as.dendrogram(hcr)
+ ddr <- reorder(ddr, Rowv)
+ rowInd <- order.dendrogram(ddr)
+ if (nr != length(rowInd))
+ stop("row dendrogram ordering gave index of wrong length")
+ else if (isTRUE(Rowv)) {
+ Rowv <- rowMeans(x, na.rm = na.rm)
+ hcr <- hclustfun(distfun(x))
+ ddr <- as.dendrogram(hcr)
+ ddr <- reorder(ddr, Rowv)
+ rowInd <- order.dendrogram(ddr)
+ if (nr != length(rowInd))
+ stop("row dendrogram ordering gave index of wrong length")
+ if (inherits(Colv, "dendrogram")) {
+ colInd <- order.dendrogram(ddc)
+ else if (identical(Colv, "Rowv")) {
+ stop("Colv = \"Rowv\" but nrow(x) != ncol(x)")
+ colInd <- order.dendrogram(ddc)
+ else if (is.integer(Colv)) {
+ hcc <- hclustfun(distfun(if (symm)
+ ddc <- as.dendrogram(hcc)
+ ddc <- reorder(ddc, Colv)
+ colInd <- order.dendrogram(ddc)
+ if (nc != length(colInd))
+ stop("column dendrogram ordering gave index of wrong length")
+ else if (isTRUE(Colv)) {
+ Colv <- colMeans(x, na.rm = na.rm)
+ hcc <- hclustfun(distfun(if (symm)
+ ddc <- as.dendrogram(hcc)
+ ddc <- reorder(ddc, Colv)
+ colInd <- order.dendrogram(ddc)
+ if (nc != length(colInd))
+ stop("column dendrogram ordering gave index of wrong length")
+ retval$rowInd <- rowInd
+ retval$colInd <- colInd
+ retval$call <- match.call()
+ cellnote <- cellnote[rowInd, colInd]
+ labRow <- if (is.null(rownames(x)))
+ else labRow <- labRow[rowInd]
+ labCol <- if (is.null(colnames(x)))
+ else labCol <- labCol[colInd]
+ retval$rowMeans <- rm <- rowMeans(x, na.rm = na.rm)
+ retval$rowSDs <- sx <- apply(x, 1, sd, na.rm = na.rm)
+ x <- sweep(x, 1, sx, "/")
+ else if (scale == "column") {
+ retval$colMeans <- rm <- colMeans(x, na.rm = na.rm)
+ retval$colSDs <- sx <- apply(x, 2, sd, na.rm = na.rm)
+ x <- sweep(x, 2, sx, "/")
+ if (missing(breaks) || is.null(breaks) || length(breaks) < 1) {
+ if (missing(col) || is.function(col))
+ else breaks <- length(col) + 1
+ if (length(breaks) == 1) {
+ breaks <- seq(min(x, na.rm = na.rm), max(x, na.rm = na.rm),
+ extreme <- max(abs(x), na.rm = TRUE)
+ breaks <- seq(-extreme, extreme, length = breaks)
+ ncol <- length(breaks) - 1
+ if (class(col) == "function")
+ min.breaks <- min(breaks)
+ max.breaks <- max(breaks)
+ x[x < min.breaks] <- min.breaks
+ x[x > max.breaks] <- max.breaks
+ if (missing(lhei) || is.null(lhei))
+ if (missing(lwid) || is.null(lwid))
+ if (missing(lmat) || is.null(lmat)) {
+ lmat <- rbind(4:3, 2:1)
+ if (!missing(ColSideColors)) {
+ #if (!is.matrix(ColSideColors))
+ #stop("'ColSideColors' must be a matrix")
+ if (!is.character(ColSideColors) || nrow(ColSideColors) != nc)
+ stop("'ColSideColors' must be a matrix of nrow(x) rows")
+ lmat <- rbind(lmat[1, ] + 1, c(NA, 1), lmat[2, ] + 1)
+ #lhei <- c(lhei[1], 0.2, lhei[2])
+ lhei=c(lhei[1], side.height.fraction*ColSideColorsSize/2, lhei[2])
+ if (!missing(RowSideColors)) {
+ #if (!is.matrix(RowSideColors))
+ #stop("'RowSideColors' must be a matrix")
+ if (!is.character(RowSideColors) || ncol(RowSideColors) != nr)
+ stop("'RowSideColors' must be a matrix of ncol(x) columns")
+ lmat <- cbind(lmat[, 1] + 1, c(rep(NA, nrow(lmat) - 1), 1), lmat[,2] + 1)
+ #lwid <- c(lwid[1], 0.2, lwid[2])
+ lwid <- c(lwid[1], side.height.fraction*RowSideColorsSize/2, lwid[2])
+ if (length(lhei) != nrow(lmat))
+ stop("lhei must have length = nrow(lmat) = ", nrow(lmat))
+ if (length(lwid) != ncol(lmat))
+ stop("lwid must have length = ncol(lmat) =", ncol(lmat))
+ op <- par(no.readonly = TRUE)
+ layout(lmat, widths = lwid, heights = lhei, respect = FALSE)
+ if (!missing(RowSideColors)) {
+ if (!is.matrix(RowSideColors)){
+ par(mar = c(margins[1], 0, 0, 0.5))
+ image(rbind(1:nr), col = RowSideColors[rowInd], axes = FALSE)
+ par(mar = c(margins[1], 0, 0, 0.5))
+ rsc = t(RowSideColors[,rowInd, drop=F])
+ rsc.names = names(table(rsc))
+ for (rsc.name in rsc.names) {
+ rsc.colors[rsc.i] = rsc.name
+ rsc[rsc == rsc.name] = rsc.i
+ rsc = matrix(as.numeric(rsc), nrow = dim(rsc)[1])
+ image(t(rsc), col = as.vector(rsc.colors), axes = FALSE)
+ if (length(rownames(RowSideColors)) > 0) {
+ axis(1, 0:(dim(rsc)[2] - 1)/max(1,(dim(rsc)[2] - 1)), rownames(RowSideColors), las = 2, tick = FALSE)
+ if (!missing(ColSideColors)) {
+ if (!is.matrix(ColSideColors)){
+ par(mar = c(0.5, 0, 0, margins[2]))
+ image(cbind(1:nc), col = ColSideColors[colInd], axes = FALSE)
+ par(mar = c(0.5, 0, 0, margins[2]))
+ csc = ColSideColors[colInd, , drop=F]
+ csc.names = names(table(csc))
+ for (csc.name in csc.names) {
+ csc.colors[csc.i] = csc.name
+ csc[csc == csc.name] = csc.i
+ csc = matrix(as.numeric(csc), nrow = dim(csc)[1])
+ image(csc, col = as.vector(csc.colors), axes = FALSE)
+ if (length(colnames(ColSideColors)) > 0) {
+ axis(2, 0:(dim(csc)[2] - 1)/max(1,(dim(csc)[2] - 1)), colnames(ColSideColors), las = 2, tick = FALSE)
+ par(mar = c(margins[1], 0, 0, margins[2]))
+ cellnote <- t(cellnote)
+ cellnote <- cellnote[, iy]
+ image(1:nc, 1:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 + c(0, nr), axes = FALSE, xlab = "", ylab = "", col = col, breaks = breaks, ...)
+ retval$rowDendrogram <- ddr
+ retval$colDendrogram <- ddc
+ retval$breaks <- breaks
+ if (!invalid(na.color) & any(is.na(x))) { # load library(gplots)
+ mmat <- ifelse(is.na(x), 1, NA)
+ image(1:nc, 1:nr, mmat, axes = FALSE, xlab = "", ylab = "",
+ col = na.color, add = TRUE)
+ axis(1, 1:nc, labels = labCol, las = 2, line = -0.5, tick = 0,
+ mtext(xlab, side = 1, line = margins[1] - 1.25)
+ axis(4, iy, labels = labRow, las = 2, line = -0.5, tick = 0,
+ mtext(ylab, side = 4, line = margins[2] - 1.25)
+ if (!missing(add.expr))
+ eval(substitute(add.expr))
+ for (csep in colsep) rect(xleft = csep + 0.5, ybottom = rep(0, length(csep)), xright = csep + 0.5 + sepwidth[1], ytop = rep(ncol(x) + 1, csep), lty = 1, lwd = 1, col = sepcolor, border = sepcolor)
+ for (rsep in rowsep) rect(xleft = 0, ybottom = (ncol(x) + 1 - rsep) - 0.5, xright = nrow(x) + 1, ytop = (ncol(x) + 1 - rsep) - 0.5 - sepwidth[2], lty = 1, lwd = 1, col = sepcolor, border = sepcolor)
+ min.scale <- min(breaks)
+ max.scale <- max(breaks)
+ x.scaled <- scale01(t(x), min.scale, max.scale)
+ if (trace %in% c("both", "column")) {
+ vline.vals <- scale01(vline, min.scale, max.scale)
+ abline(v = i - 0.5 + vline.vals, col = linecol,
+ xv <- rep(i, nrow(x.scaled)) + x.scaled[, i] - 0.5
+ yv <- 1:length(xv) - 0.5
+ lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
+ if (trace %in% c("both", "row")) {
+ hline.vals <- scale01(hline, min.scale, max.scale)
+ abline(h = i + hline, col = linecol, lty = 2)
+ yv <- rep(i, ncol(x.scaled)) + x.scaled[i, ] - 0.5
+ yv <- rev(c(yv[1], yv))
+ xv <- length(yv):1 - 0.5
+ lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
+ if (!missing(cellnote))
+ text(x = c(row(cellnote)), y = c(col(cellnote)), labels = c(cellnote),
+ col = notecol, cex = notecex)
+ par(mar = c(margins[1], 0, 0, 0))
+ if (dendrogram %in% c("both", "row")) {
+ plot(ddr, horiz = TRUE, axes = FALSE, yaxs = "i", leaflab = "none")
+ par(mar = c(0, 0, if (!is.null(main)) 5 else 0, margins[2]))
+ if (dendrogram %in% c("both", "column")) {
+ plot(ddc, axes = FALSE, xaxs = "i", leaflab = "none")
+ title(main, cex.main = 1.5 * op[["cex.main"]])
+ par(mar = c(5, 4, 2, 1), cex = 0.75)
+ max.raw <- max(abs(c(x, breaks)), na.rm = TRUE)
+ tmpbreaks[1] <- -max(abs(x), na.rm = TRUE)
+ tmpbreaks[length(tmpbreaks)] <- max(abs(x), na.rm = TRUE)
+ min.raw <- min(x, na.rm = TRUE)
+ max.raw <- max(x, na.rm = TRUE)
+ z <- seq(min.raw, max.raw, length = length(col))
+ image(z = matrix(z, ncol = 1), col = col, breaks = tmpbreaks,
+ xaxt = "n", yaxt = "n")
+ par(usr = c(0, 1, 0, 1))
+ xv <- scale01(as.numeric(lv), min.raw, max.raw)
+ axis(1, at = xv, labels = lv)
+ mtext(side = 1, "Row Z-Score", line = 2)
+ else if (scale == "column")
+ mtext(side = 1, "Column Z-Score", line = 2)
+ else mtext(side = 1, KeyValueName, line = 2)
+ if (density.info == "density") {
+ dens <- density(x, adjust = densadj, na.rm = TRUE)
+ omit <- dens$x < min(breaks) | dens$x > max(breaks)
+ dens$x <- dens$x[-omit]
+ dens$y <- dens$y[-omit]
+ dens$x <- scale01(dens$x, min.raw, max.raw)
+ lines(dens$x, dens$y/max(dens$y) * 0.95, col = denscol,
+ axis(2, at = pretty(dens$y)/max(dens$y) * 0.95, pretty(dens$y))
+ title("Color Key\nand Density Plot")
+ mtext(side = 2, "Density", line = 2)
+ else if (density.info == "histogram") {
+ h <- hist(x, plot = FALSE, breaks = breaks)
+ hx <- scale01(breaks, min.raw, max.raw)
+ hy <- c(h$counts, h$counts[length(h$counts)])
+ lines(hx, hy/max(hy) * 0.95, lwd = 1, type = "s",
+ axis(2, at = pretty(hy)/max(hy) * 0.95, pretty(hy))
+ title("Color Key\nand Histogram")
+ mtext(side = 2, "Count", line = 2)
+ else title("Color Key")
+ retval$colorTable <- data.frame(low = retval$breaks[-length(retval$breaks)],
+ high = retval$breaks[-1], color = retval$col)
+output.dir <- '~/Google Drive/SwantonLab/Rimages/test/'
+figure.dir <- paste(output.dir, 'figures-jan15/', sep = '')
+# load some data -- to be saved in an .RData file later #
+load('~/Google Drive/SwantonLab/Rimages/2019-01-25-allDataForPlots-again.RData')
+all_danaher <- immune.subsets[,c(grep(pattern = 'danah', colnames(immune.subsets), value = TRUE))]
+all_danaher <- all_danaher[,c(grep(pattern = 'resid', colnames(all_danaher), value = TRUE, invert = TRUE))]
+colnames(all_danaher) <- sapply(colnames(all_danaher), FUN = get_split, sp = '.score', val = 1)
+all_danaher_accepted <- all_danaher
+luad_danaher <- all_danaher_accepted[which(substr(rownames(all_danaher_accepted), 1, 8) %in% patient.summary$CRUKid[which(patient.summary$simple.hist %in% c('LUAD'))]),]
+luad_danaher_scaled <- scale(luad_danaher)
+lusc_danaher <- all_danaher_accepted[which(substr(rownames(all_danaher_accepted), 1, 8) %in% patient.summary$CRUKid[which(patient.summary$simple.hist == 'LUSC')]),]
+lusc_danaher_scaled <- scale(lusc_danaher)
+other_danaher <- all_danaher_accepted[which(substr(rownames(all_danaher_accepted), 1, 8) %in% patient.summary$CRUKid[which(patient.summary$simple.hist == 'other')]),]
+other_danaher_scaled <- scale(other_danaher)
+all_classification <- c()
+all_immune_distances <- c()
+hmcols <- colorRampPalette(c('dark blue', 'white','dark red'))(100)
+hm <- heatmap.3(t(lusc_danaher_scaled), trace = 'none', hclustfun = function(x) hclust(x, method= 'ward.D2'), dendrogram = 'column', col = hmcols, na.rm=TRUE)
+hm_d <- lapply(unique(substr(rownames(lusc_danaher_scaled), 1, 8)), FUN = function(x) {dist(lusc_danaher_scaled[grep(pattern = x, rownames(lusc_danaher_scaled)),,drop=FALSE])} )
+names(hm_d) <- unique(substr(rownames(lusc_danaher_scaled), 1, 8))
+all_immune_distances <- c(all_immune_distances, hm_d)
+tmp_patients <- unique(substr(rownames(hm$carpet), 1, 8))
+tmp_clusters <- cutree(hm$colDendrogram, k = 2)
+bp <- boxplot(cd8 ~ tmp_clusters, data = lusc_danaher_scaled, plot=FALSE)
+tmp_clusters <- ifelse(tmp_clusters == which.min(bp$stats[3,]), yes = 1, no = 2)
+all_clusters <- c(all_clusters, tmp_clusters)
+tmp_df <- as.data.frame(matrix(data = 0, ncol = length(tmp_patients), nrow = length(tmp_clusters), dimnames = list(names(tmp_clusters), tmp_patients)))
+for(x in rownames(tmp_df)){
+ tmp_df[x,substr(x, 1, 8)] <- tmp_clusters[x]
+tmp_classify <- apply(tmp_df, MARGIN = 2, table)
+tmp_classify_col <- ifelse(lapply(tmp_classify, length) == 3, yes = '#df9b2f', no = ifelse(unlist(lapply(tmp_classify, FUN = function(x) any(grepl(pattern = 1, x = names(x))))), yes = '#30ABDF', no = '#BD2131'))
+all_classification <- c(all_classification, tmp_classify_col)
+tmp_classify_col <- data.frame(tmp_classify_col, stringsAsFactors = FALSE)
+tmp_classify_col$tmp <- sapply(rownames(tmp_classify_col), FUN = function(x){which(colnames(tmp_df) == x)})
+lusc_order <- rownames(tmp_classify_col)[with(tmp_classify_col, order(factor(tmp_classify_col, levels = c('#30ABDF', '#df9b2f', '#BD2131')), tmp))]
+tmp_df <- tmp_df[,lusc_order]
+# and remove the unnecessary column and re-order the colors
+tmp_classify_col$tmp <- NULL
+tmp_classify_col <- tmp_classify_col[lusc_order,,drop=FALSE]
+col_list <- lapply(colnames(tmp_df), FUN = function(x){return(c("1" = '#30ABDF', "0" = "#e9e9e9", "2" = '#BD2131'))})
+names(col_list) <- colnames(tmp_df)
+ha <- HeatmapAnnotation(tmp_df, show_legend = FALSE, gap = unit(0, 'mm'),show_annotation_name = TRUE, which = 'column', gp = gpar(col = 'white'), annotation_name_gp = gpar(cex = 0.6, col = tmp_classify_col$tmp_classify_col), na_col = 'brown',
+hm_lusc <- Heatmap(t(lusc_danaher_scaled),
+ col = circlize::colorRamp2(c(-3.5, 0, 3.5), c('dark blue', "white", 'dark red')),
+ clustering_method_columns = 'ward.D2', clustering_method_rows = 'ward.D2',
+ show_column_dend = TRUE, show_row_dend = FALSE,
+ show_column_names = FALSE,
+ show_heatmap_legend = FALSE,
+ bottom_annotation = ha,
+ bottom_annotation_height = unit(4, 'in'),
+ row_names_gp = gpar(fontsize = 10))
+lusc_cell_type_order <- rev(colnames(lusc_danaher_scaled)[hm$rowInd])
+hm <- heatmap.3(t(luad_danaher_scaled[,lusc_cell_type_order]), trace = 'none', hclustfun = function(x) hclust(x, method= 'ward.D2'), dendrogram = 'column', col = hmcols, na.rm=TRUE, Rowv = FALSE)
+#hm <- heatmap.3(t(luad_danaher_scaled), trace = 'none', hclustfun = function(x) hclust(x, method= 'ward.D2'), dendrogram = 'column', col = hmcols, na.rm=TRUE)
+hm_d <- lapply(unique(substr(rownames(luad_danaher_scaled), 1, 8)), FUN = function(x) {dist(luad_danaher_scaled[grep(pattern = x, rownames(luad_danaher_scaled)),,drop=FALSE])} )
+names(hm_d) <- unique(substr(rownames(luad_danaher_scaled), 1, 8))
+all_immune_distances <- c(all_immune_distances, hm_d)
+tmp_patients <- unique(substr(rownames(hm$carpet), 1, 8))
+tmp_clusters <- cutree(hm$colDendrogram, k = 2)
+bp <- boxplot(cd8 ~ tmp_clusters, data = luad_danaher_scaled, plot=FALSE)
+tmp_clusters <- ifelse(tmp_clusters == which.min(bp$stats[3,]), yes = 1, no = 2)
+all_clusters <- c(all_clusters, tmp_clusters)
+tmp_df <- as.data.frame(matrix(data = 0, ncol = length(tmp_patients), nrow = length(tmp_clusters), dimnames = list(names(tmp_clusters), tmp_patients)))
+for(x in rownames(tmp_df)){
+ tmp_df[x,substr(x, 1, 8)] <- tmp_clusters[x]
+tmp_classify <- apply(tmp_df, MARGIN = 2, table)
+tmp_classify_col <- ifelse(lapply(tmp_classify, length) == 3, yes = '#df9b2f', no = ifelse(unlist(lapply(tmp_classify, FUN = function(x) any(grepl(pattern = 1, x = names(x))))), yes = '#30ABDF', no = '#BD2131'))
+all_classification <- c(all_classification, tmp_classify_col)
+tmp_classify_col <- data.frame(tmp_classify_col, stringsAsFactors = FALSE)
+tmp_classify_col$tmp <- sapply(rownames(tmp_classify_col), FUN = function(x){which(colnames(tmp_df) == x)})
+luad_order <- rownames(tmp_classify_col)[with(tmp_classify_col, order(factor(tmp_classify_col, levels = c('#30ABDF', '#df9b2f', '#BD2131')), tmp))]
+tmp_df <- tmp_df[,luad_order]
+# and remove the unnecessary column and re-order the colors
+tmp_classify_col$tmp <- NULL
+tmp_classify_col <- tmp_classify_col[luad_order,,drop=FALSE]
+col_list <- lapply(colnames(tmp_df), FUN = function(x){return(c("1" = '#30ABDF', "0" = "#e9e9e9", "2" = '#BD2131'))})
+names(col_list) <- colnames(tmp_df)
+ha <- HeatmapAnnotation(tmp_df, show_legend = FALSE, gap = unit(0, 'mm'),show_annotation_name = TRUE, which = 'column', gp = gpar(col = 'white'), annotation_name_gp = gpar(cex = 0.6, col = tmp_classify_col$tmp_classify_col), na_col = 'brown',
+hm_luad <- Heatmap(t(luad_danaher_scaled[,lusc_cell_type_order]),
+ col = circlize::colorRamp2(c(-3.5, 0, 3.5), c('dark blue', "white", 'dark red')),
+ clustering_method_columns = 'ward.D2', cluster_rows = FALSE,
+ show_column_dend = TRUE, show_row_dend = FALSE,
+ show_column_names = FALSE,
+ show_heatmap_legend = FALSE,
+ bottom_annotation = ha,
+ bottom_annotation_height = unit(4, 'in'),
+ row_names_gp = gpar(fontsize = 10))
+hm <- heatmap.3(t(other_danaher_scaled), trace = 'none', hclustfun = function(x) hclust(x, method= 'ward.D2'), dendrogram = 'none', col = hmcols)
+hm_d <- lapply(unique(substr(rownames(other_danaher_scaled), 1, 8)), FUN = function(x) {dist(other_danaher_scaled[grep(pattern = x, rownames(other_danaher_scaled)),,drop=FALSE])} )
+names(hm_d) <- unique(substr(rownames(other_danaher_scaled), 1, 8))
+all_immune_distances <- c(all_immune_distances, hm_d)
+tmp_patients <- unique(substr(rownames(hm$carpet), 1, 8))
+tmp_clusters <- cutree(hm$colDendrogram, k = 2)
+bp <- boxplot(cd8 ~ tmp_clusters, data = other_danaher_scaled, plot=FALSE)
+tmp_clusters <- ifelse(tmp_clusters == which.min(bp$stats[3,]), yes = 1, no = 2)
+all_clusters <- c(all_clusters, tmp_clusters)
+tmp_df <- as.data.frame(matrix(data = 0, ncol = length(tmp_patients), nrow = length(tmp_clusters), dimnames = list(names(tmp_clusters), tmp_patients)))
+for(x in rownames(tmp_df)){
+ tmp_df[x,substr(x, 1, 8)] <- tmp_clusters[x]
+tmp_classify <- alply(.data = tmp_df, .margins = 2, .fun = table)
+names(tmp_classify) <- attr(tmp_classify, "split_labels")$X1
+tmp_classify_col <- ifelse(lapply(tmp_classify, length) == 3, yes = '#df9b2f', no = ifelse(unlist(lapply(tmp_classify, FUN = function(x) any(grepl(pattern = 1, x = names(x))))), yes = '#30ABDF', no = '#BD2131'))
+all_classification <- c(all_classification, tmp_classify_col)
+tmp_classify_col <- data.frame(tmp_classify_col, stringsAsFactors = FALSE)
+tmp_classify_col$tmp <- sapply(rownames(tmp_classify_col), FUN = function(x){which(colnames(tmp_df) == x)})
+other_order <- rownames(tmp_classify_col)[with(tmp_classify_col, order(factor(tmp_classify_col, levels = c('#30ABDF', '#df9b2f', '#BD2131')), tmp))]
+tmp_df <- tmp_df[,other_order]
+# and remove the unnecessary column and re-order the colors
+tmp_classify_col$tmp <- NULL
+tmp_classify_col <- tmp_classify_col[other_order,,drop=FALSE]
+col_list <- lapply(colnames(tmp_df), FUN = function(x){return(c("1" = '#30ABDF', "0" = "#e9e9e9", "2" = '#BD2131'))})
+names(col_list) <- colnames(tmp_df)
+ha <- HeatmapAnnotation(tmp_df, show_legend = FALSE, gap = unit(0, 'mm'),show_annotation_name = TRUE, which = 'column', gp = gpar(col = 'white'), annotation_name_gp = gpar(cex = 0.6, col = tmp_classify_col$tmp_classify_col), na_col = 'brown',
+hm_other <- Heatmap(t(other_danaher_scaled),
+ col = circlize::colorRamp2(c(-3.5, 0, 3.5), c('dark blue', "white", 'dark red')),
+ clustering_method_columns = 'ward.D2', clustering_method_rows = 'ward.D2',
+ show_column_dend = TRUE, show_row_dend = FALSE,
+ show_column_names = FALSE,
+ show_heatmap_legend = FALSE,
+ bottom_annotation = ha,
+ bottom_annotation_height = unit(4, 'in'),
+ row_names_gp = gpar(fontsize = 10))
+tx100_immune_cluster <- ifelse(is.na(all_clusters), yes = NA, no = ifelse(all_clusters == 1, yes = 'low', no = 'high'))
+names(tx100_immune_cluster) <- names(all_clusters)
+tx100_immune_classification <- ifelse(is.na(all_classification), yes = NA, no = ifelse(all_classification == '#BD2131', yes = 'high', no = ifelse(all_classification == '#df9b2f', yes = 'mixed', no = 'low')))
+names(tx100_immune_classification) <- names(all_classification)
+# add clusters to patient summary
+patient.summary$immune_classification <- tx100_immune_classification[match(patient.summary$CRUKid, names(tx100_immune_classification))]
+patient.summary.region$immune_cluster <- tx100_immune_cluster[match(patient.summary.region$CRUKidRegion, names(tx100_immune_cluster))]
+patient.summary.region$immune_classification <- patient.summary$immune_classification[match(patient.summary.region$CRUKid, patient.summary$CRUKid)]
+# salvage the samples without RNAseq...
+salvaged_clusters <- c()
+for(x in patient.summary.region$CRUKidRegion[is.na(patient.summary.region$cd8.score.danaher)]){
+ tmpTILs <- patient.summary.region[x,'pathology_TILs']
+ if(is.na(tmpTILs)){salvaged_clusters <- c(salvaged_clusters, NA)}
+ tmp_hist <- patient.summary.region[x,'simple.hist']
+ tmp_sub <- patient.summary.region[x,'subtype']
+ if(tmp_hist %in% c('LUSC', 'other', 'LUAD')){
+ df <- patient.summary.region[which(patient.summary.region$simple.hist == tmp_hist),]
+ out <- classify_based_on_TILs(df)
+ salvaged_clusters <- c(salvaged_clusters, out)
+names(salvaged_clusters) <- patient.summary.region$CRUKidRegion[is.na(patient.summary.region$cd8.score.danaher)]
+salvaged_classification <- sapply(unique(patient.summary.region$CRUKid[is.na(patient.summary.region$cd8.score.danaher)]), FUN = function(x){
+ new_clusters <- salvaged_clusters[grep(pattern = x, names(salvaged_clusters))]
+ old_clusters <- tx100_immune_cluster[grep(pattern = x, names(tx100_immune_cluster))]
+ all_clusters <- c(new_clusters, old_clusters)
+ all_clusters <- all_clusters[!is.na(all_clusters)]
+ if(length(all_clusters) == 0){return(NA)}
+ if(length(table(all_clusters)) == 1){return(names(table(all_clusters)))}
+ if(length(table(all_clusters)) > 1){return('mixed')}
+# combine the two approaches
+patient.summary.region$salvaged_clusters <- salvaged_clusters[match(patient.summary.region$CRUKidRegion, names(salvaged_clusters))]
+patient.summary.region$salvaged_classification <- salvaged_classification[match(patient.summary.region$CRUKid, names(salvaged_classification))]
+patient.summary$salvaged_classification <- salvaged_classification[match(patient.summary$CRUKid, names(salvaged_classification))]
+# save the original clustering
+patient.summary.region$orig_immune_cluster <- patient.summary.region$immune_cluster
+patient.summary.region$orig_immune_classification <- patient.summary.region$immune_classification
+patient.summary$orig_immune_classification <- patient.summary$immune_classification
+# and update immune clusters/immune classification
+patient.summary.region$immune_cluster[is.na(patient.summary.region$immune_cluster)] <- patient.summary.region$salvaged_clusters[is.na(patient.summary.region$immune_cluster)]
+patient.summary$immune_classification <- sapply(1:nrow(patient.summary), FUN = function(x){
+ old <- patient.summary$orig_immune_classification[x]
+ new <- patient.summary$salvaged_classification[x]
+ if(is.na(new)){return(old)}
+patient.summary.region$immune_classification <- patient.summary$immune_classification[match(patient.summary.region$CRUKid, patient.summary$CRUKid)]
+## if we want to check the results when we don't use TILs
+# patient.summary$immune_classification <- patient.summary$orig_immune_classification
+# patient.summary.region$immune_classification <- patient.summary.region$orig_immune_classification
+# patient.summary.region$immune_cluster <- patient.summary.region$orig_immune_cluster
+patient.summary$high_immune <- ifelse(patient.summary$immune_classification == 'high', yes = TRUE, no = FALSE)
+patient.summary$low_evasion <- ifelse(patient.summary$high_immune == TRUE | (patient.summary$immune_edited == FALSE & patient.summary$HLA_disruption == FALSE), yes = TRUE, no = FALSE)
+mutTableRegion$immune_classification <- patient.summary$immune_classification[match(mutTableRegion$CRUKid, patient.summary$CRUKid)]
+mutTableRegion$immune_cluster <- patient.summary.region$immune_cluster[match(mutTableRegion$CRUKRegionID, patient.summary.region$CRUKidRegion)]
+mutTableAll.together.filtered$immune_classification <- patient.summary$immune_classification[match(mutTableAll.together.filtered$CRUKid, patient.summary$CRUKid)]
+keep_to_add$immune_classification <- patient.summary$immune_classification[match(keep_to_add$CRUKid, patient.summary$CRUKid)]
+keep_to_add$immune_cluster <- patient.summary.region$immune_cluster[match(keep_to_add$CRUKRegionID, patient.summary.region$CRUKidRegion)]
+ plot(total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'genomic'], total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'immune'], xlab = 'Pairwise genomic distance', ylab = 'Pairwise immune distance', pch = 16, col = total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'color'], ylim = c(0,10), las = 1)
+ p.val <- cor.test(total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'genomic'], total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'immune'], method = 's')
+ legend(x = 'topright', legend = paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), '\n', 'rho: ', formatC(p.val$estimate, digits = 1, format = 'e'), sep= ''), bty = 'n')
+ points(total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'genomic'], total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'immune'], xlab = 'Pairwise genomic distance', ylab = 'Pairwise immune distance', pch = 16, col = total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'color'])
+ p.val <- cor.test(total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'genomic'], total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'immune'], method = 's')
+ legend(x = 'bottomright', legend = paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), '\n', 'rho: ', formatC(p.val$estimate, digits = 1, format = 'e'), sep= ''), bty = 'n')
+ tmp <- gsub(pattern = 'low', replacement = '#30ABDF', x = gsub(pattern = 'high', replacement = '#BD2131', x = patient.summary.region$immune_cluster[which(patient.summary.region$simple.hist == 'LUAD')]))
+ boxplot(shannon.ith ~ factor(immune_classification, levels = c('low', 'mixed', 'high')), data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),], las = 1, ylim = c(0,2.4), main = 'Lung adeno.', ylab = 'Shannon entropy')
+ beeswarm(shannon.ith ~ factor(immune_classification, levels = c('low', 'mixed', 'high')), data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),], add = TRUE, pch = 16, pwcol = tmp, cex = 1.6)
+ p.val <- wilcox.test(shannon.ith ~ immune_classification == 'low', data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),])$p.value
+ legend('bottomright', paste('p -- low-mixed/high : ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n')
+ tmp <- gsub(pattern = 'low', replacement = '#30ABDF', x = gsub(pattern = 'high', replacement = '#BD2131', x = patient.summary.region$immune_cluster[which(patient.summary.region$simple.hist == 'LUSC')]))
+ boxplot(shannon.ith ~ factor(immune_classification, levels = c('low', 'mixed', 'high')), data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUSC'),], las = 1, ylim = c(0,2.4), main = 'Lung squam.', ylab = 'Shannon entropy')
+ beeswarm(shannon.ith ~ factor(immune_classification, levels = c('low', 'mixed', 'high')), data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUSC'),], add = TRUE, pch = 16, pwcol = tmp, cex = 1.6)
+ p.val <- wilcox.test(shannon.ith ~ immune_classification == 'low', data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUSC'),])$p.value
+ legend('bottomright', paste('p -- low-mixed/high : ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n')
+ tmp <- patient.summary[,c('ie.all.updated', 'ie.clonal.updated', 'ie.subclonal.updated', 'simple.hist', 'immune_classification')]
+ tmp <- tmp[complete.cases(tmp),]
+ tmp <- tmp[which(tmp$simple.hist != 'other'),]
+ p.val <- t.test(tmp$ie.clonal.updated[which(tmp$immune_classification == 'high')], tmp$ie.subclonal.updated[which(tmp$immune_classification == 'high')], paired = TRUE)$p.value # less IE in subclones (because clonal IE - subclonal IE < 0)
+ stripchart(tmp[which(tmp$immune_classification == 'high'),c('ie.clonal.updated', 'ie.subclonal.updated')], vertical = TRUE, pch= 16, at = c(0.9, 1.2), xlim =c(0.7,1.4), ylim = c(0.7, 1.22), las = 1, group.names = c('Clonal', 'Subclonal'), cex = 1.5, ylab = 'Immunoediting')
+ segments(x0 = 0.9, y0 = tmp$ie.clonal.updated[which(tmp$immune_classification == 'high')], x1 = 1.2, y1 = tmp$ie.subclonal.updated[which(tmp$immune_classification == 'high')], col = '#BD2131', lwd = 3)
+ legend(legend = paste('paired t-test p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), x = 'topleft', bty = 'n', cex = 0.9)
+ p.val <- t.test(tmp$ie.clonal.updated[which(tmp$immune_classification == 'mixed')], tmp$ie.subclonal.updated[which(tmp$immune_classification == 'mixed')], paired = TRUE)$p.value # less IE in subclones (because clonal IE - subclonal IE < 0)
+ stripchart(tmp[which(tmp$immune_classification == 'mixed'),c('ie.clonal.updated', 'ie.subclonal.updated')], vertical = TRUE, pch= 16, at = c(0.9, 1.2), xlim =c(0.7,1.4), ylim = c(0.7, 1.22), las = 1, group.names = c('Clonal', 'Subclonal'), cex = 1.5, ylab = 'Immunoediting')
+ segments(x0 = 0.9, y0 = tmp$ie.clonal.updated[which(tmp$immune_classification == 'mixed')], x1 = 1.2, y1 = tmp$ie.subclonal.updated[which(tmp$immune_classification == 'mixed')], col = '#df9b2f', lwd = 3)
+ legend(legend = paste('paired t-test p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), x = 'topleft', bty = 'n', cex = 0.9)
+ p.val <- t.test(tmp$ie.clonal.updated[which(tmp$immune_classification == 'low')], tmp$ie.subclonal.updated[which(tmp$immune_classification == 'low')], paired = TRUE)$p.value # less IE in subclones (because clonal IE - subclonal IE < 0)
+ stripchart(tmp[which(tmp$immune_classification == 'low'),c('ie.clonal.updated', 'ie.subclonal.updated')], vertical = TRUE, pch= 16, at = c(0.9, 1.2), xlim =c(0.7,1.4), ylim = c(0.7, 1.22), las = 1, group.names = c('Clonal', 'Subclonal'), cex = 1.5, ylab = 'Immunoediting')
+ segments(x0 = 0.9, y0 = tmp$ie.clonal.updated[which(tmp$immune_classification == 'low')], x1 = 1.2, y1 = tmp$ie.subclonal.updated[which(tmp$immune_classification == 'low')], col = '#30ABDF', lwd = 3)
+ legend(legend = paste('paired t-test p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), x = 'topleft', bty = 'n', cex = 0.9)
+ tmp <- mutTableRegion[which(mutTableRegion$mutation_id %in% mutTableAll.together.filtered$mutation_id[!is.na(mutTableAll.together.filtered$CPNremove)]),]
+ tmp <- tmp[which(tmp$nonsynonymous == TRUE),]
+ tmp <- tmp[which(tmp$DriverMut == FALSE),]
+ tmp <- tmp[,c('CRUKRegionID', 'mutation_id', 'count', 'strong.count', 'any.HLA.loss', 'immune_classification', 'immune_cluster', 'nonsynonymous', 'DriverMut', 'CPNremove')]
+ to_add <- keep_to_add[,c('CRUKRegionID', 'mutation_id', 'count', 'strong.count', 'any.HLA.loss', 'immune_classification', 'immune_cluster', 'nonsynonymous', 'DriverMut', 'CPNremove')]
+ tmp <- rbind(tmp, to_add)
+ tmp_nodup <- tmp[!duplicated(tmp$mutation_id),]
+ to_barplot <- sort(table(substr(tmp_nodup$CRUKRegionID, 1, 8)[which(tmp_nodup$count == 1 & tmp_nodup$CPNremove == TRUE)]), decreasing=TRUE)
+ barplot(to_barplot, las = 1, names = NA, ylim = c(0,40), col = 'black', ylab = 'Number CN loss neo', xlab = 'Tumor sample')
+ props <- to_barplot/(patient.summary$ClonalNeo[match(names(to_barplot), patient.summary$CRUKid)])
+ barplot(rbind(props, 0.5-props), las = 1, ylim = c(0,0.5), col = c('black', 'lightgrey'), ylab = '% CN loss neo', xlab = 'Tumor sample', border = NA)
+ tmp <- mutTableRegion[which(mutTableRegion$mutation_id %in% mutTableAll.together.filtered$mutation_id[!is.na(mutTableAll.together.filtered$CPNremove)]),]
+ tmp <- tmp[which(tmp$nonsynonymous == TRUE),]
+ tmp <- tmp[which(tmp$DriverMut == FALSE),]
+ tmp <- tmp[,c('regionMutID', 'mutation_id', 'count', 'strong.count', 'any.HLA.loss', 'immune_classification', 'immune_cluster', 'nonsynonymous', 'DriverMut', 'CPNremove')]
+ to_add <- keep_to_add[,c('regionMutID', 'mutation_id', 'count', 'strong.count', 'any.HLA.loss', 'immune_classification', 'immune_cluster', 'nonsynonymous', 'DriverMut', 'CPNremove')]
+ tmp <- rbind(tmp, to_add)
+ # tmp$RegionID <- sapply(tmp$regionMutID, split.n.paste, sp = ':', start=1, end=2, co=':')
+ # tmp$SampleID <- sapply(tmp$regionMutID, split.n.paste, sp = ':', start=1, end=1, co=':')
+ # # downsample to 1 region per patient
+ # all_patients <- unique(tmp$SampleID)
+ # for(x in all_patients){keep_regions <- c(keep_regions, sample(grep(pattern = x, unique(tmp$RegionID), value = TRUE), size = 1))}
+ # tmp <- tmp[which(tmp$RegionID %in% keep_regions),]
+ tmp2 <- table(tmp$count, tmp$CPNremove)
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ neo_ratio <- c(neo_ratio, paste(as.character(tmp2['1','TRUE']), '/', as.character(tmp2['1', 'FALSE'] + tmp2['1', 'TRUE']), sep = ''))
+ non_neo_ratio <- c(non_neo_ratio, paste(as.character(tmp2['0','TRUE']), '/', as.character(tmp2['0', 'FALSE'] + tmp2['0', 'TRUE']), sep = ''))
+ tmp2 <- table(tmp$count[which(tmp$immune_cluster %in% c('high'))], tmp$CPNremove[which(tmp$immune_cluster %in% c('high'))])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ neo_ratio <- c(neo_ratio, paste(as.character(tmp2['1','TRUE']), '/', as.character(tmp2['1', 'FALSE'] + tmp2['1', 'TRUE']), sep = ''))
+ non_neo_ratio <- c(non_neo_ratio, paste(as.character(tmp2['0','TRUE']), '/', as.character(tmp2['0', 'FALSE'] + tmp2['0', 'TRUE']), sep = ''))
+ tmp2 <- table(tmp$count[which(tmp$immune_cluster %in% c('low'))], tmp$CPNremove[which(tmp$immune_cluster %in% c('low'))])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ neo_ratio <- c(neo_ratio, paste(as.character(tmp2['1','TRUE']), '/', as.character(tmp2['1', 'FALSE'] + tmp2['1', 'TRUE']), sep = ''))
+ non_neo_ratio <- c(non_neo_ratio, paste(as.character(tmp2['0','TRUE']), '/', as.character(tmp2['0', 'FALSE'] + tmp2['0', 'TRUE']), sep = ''))
+ bp <- barplot(ests, col = c('#00441b', '#BD2131', '#30ABDF'), border = FALSE, las = 1, cex.axis = 0.9, cex.names = 0.7, ylab = 'OR -- neo in region of CPN loss', ylim = c(0,3), names = c('All', 'High', 'Low'), axes = FALSE)
+ axis(side = 2, at = c(0,1,2,3), labels = c(0,1,2,3), las = 2)
+ mtext(text = paste('p = ', formatC(p.vals, digits = 1, format = 'e'), sep = ''), side = 1, line = 2, cex = 0.7, at = bp[,1])
+ segments(x0 = bp[1,1], x1 = bp[1,1], y0 = ors1[1], y1 = ors2[1])
+ segments(x0 = bp[2,1], x1 = bp[2,1], y0 = ors1[2], y1 = ors2[2])
+ segments(x0 = bp[3,1], x1 = bp[3,1], y0 = ors1[3], y1 = ors2[3])
+ abline(h = 1, lty = 2, col = 'black')
+ mtext(text = neo_ratio, side = 1, line = 3, cex = 0.7, at = bp[,1])
+ mtext(text = non_neo_ratio, side = 1, line = 4, cex = 0.7, at = bp[,1])
+ tmp <- patient.summary[,c('ie.all.updated', 'ie.clonal.updated', 'ie.subclonal.updated', 'simple.hist', 'immune_classification', 'anyCPN_remove')]
+ tmp <- tmp[complete.cases(tmp),]
+ tmp <- tmp[which(tmp$simple.hist != 'other'),]
+ p.val <- t.test(tmp$ie.clonal.updated[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==TRUE)], tmp$ie.subclonal.updated[which(tmp$immune_classification == 'low' & tmp$anyCPN_remove == TRUE)], paired = TRUE)$p.value # less IE in subclones (because clonal IE - subclonal IE < 0)
+ stripchart(tmp[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==TRUE),c('ie.clonal.updated', 'ie.subclonal.updated')], vertical = TRUE, pch= 16, at = c(0.9, 1.2), xlim =c(0.7,1.4), ylim = c(0.7, 1.22), las = 1, group.names = c('Clonal', 'Subclonal'), cex = 1.5, ylab = 'Immunoediting')
+ segments(x0 = 0.9, y0 = tmp$ie.clonal.updated[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==TRUE)], x1 = 1.2, y1 = tmp$ie.subclonal.updated[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==TRUE)], col = '#30ABDF', lwd = 3)
+ legend(legend = paste('paired t-test p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), x = 'topleft', bty = 'n', cex = 0.9)
+ p.val <- t.test(tmp$ie.clonal.updated[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==FALSE)], tmp$ie.subclonal.updated[which(tmp$immune_classification == 'low' & tmp$anyCPN_remove == FALSE)], paired = TRUE)$p.value # less IE in subclones (because clonal IE - subclonal IE < 0)
+ stripchart(tmp[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==FALSE),c('ie.clonal.updated', 'ie.subclonal.updated')], vertical = TRUE, pch= 16, at = c(0.9, 1.2), xlim =c(0.7,1.4), ylim = c(0.7, 1.22), las = 1, group.names = c('Clonal', 'Subclonal'), cex = 1.5, ylab = 'Immunoediting')
+ segments(x0 = 0.9, y0 = tmp$ie.clonal.updated[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==FALSE)], x1 = 1.2, y1 = tmp$ie.subclonal.updated[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==FALSE)], col = '#30ABDF', lwd = 3)
+ legend(legend = paste('paired t-test p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), x = 'topleft', bty = 'n', cex = 0.9)
+ layout(mat = matrix(c(1,2,3), nrow = 3), heights = c(0.7, 0.2, 0.1))
+ tmp <- patient.summary[,c('ClonalNeo', 'SubclonalNeo', 'TotalExpressedNeo', 'ClonalExpressedNeo', 'SubclonalExpressedNeo', 'immune_classification', 'simple.hist')]
+ tmp$simple.hist <- factor(tmp$simple.hist, levels = c('LUAD', 'LUSC', 'other'))
+ tmp <- tmp[which(tmp$simple.hist != 'other'),]
+ tmp <- tmp[!is.na(tmp$TotalExpressedNeo),]
+ tmp <- tmp[-which(tmp$ClonalNeo + tmp$SubclonalNeo == 0),]
+ tmp <- tmp[order(tmp$ClonalNeo, decreasing = TRUE),]
+ tmp <- tmp[order(tmp$simple.hist),]
+ tmp$ClonalExpressedNeoDif <- tmp$ClonalNeo-tmp$ClonalExpressedNeo
+ tmp$SubclonalExpressedNeoDif <- tmp$SubclonalNeo-tmp$SubclonalExpressedNeo
+ bp <- barplot(unname(t(tmp[,c('ClonalExpressedNeo', 'ClonalExpressedNeoDif')])), width = 0.5, space = 2, col = c(c('#3182bd', '#3182bd90')), border = FALSE, las = 2, cex.axis = 0.9, ylab = 'Number Neo', ylim = c(0,800))
+ barplot(t(tmp[,c('SubclonalExpressedNeo', 'SubclonalExpressedNeoDif')]), width = 0.5, space = c(3,rep(2, nrow(tmp)-1)), col = c(c('#de2d26', '#de2d2690')), border = FALSE, add = TRUE, axes = FALSE, axisnames = FALSE)
+ legend('topright', legend = c('Clonal Neo', 'Clonal Neo (exp)', 'Subclonal Neo', 'Subclonal Neo (exp)'), fill = c('#3182bd90', '#3182bd', '#de2d2690', '#de2d26'), border = FALSE, bty = 'n', ncol = 2, cex = 0.7)
+ tmp <- mutTableAll.together.filtered[which(mutTableAll.together.filtered$count == 1),]
+ tmp <- tmp[!is.na(tmp$ITHState_RNA),]
+ tmp <- tmp[which(tmp$PyCloneClonal == 'C'),]
+ to_plot <- as.data.frame.matrix(table(tmp$CRUKid, tmp$ITHState_RNA), stringsAsFactors = FALSE)
+ to_plot <- to_plot/(rowSums(to_plot))
+ to_plot <- to_plot[ord,]
+ barplot(rbind(to_plot[,1], 1-to_plot[,1]), col = c('black', 'lightgrey'), border = FALSE, width = 0.5, axisnames = FALSE, las = 2, cex.names = 0.7, ylab = 'Fraction of clonal neoantigens ubiquitously expressed')
+ tmp_col <- ifelse(patient.summary$simple.hist[match(rownames(to_plot), patient.summary$CRUKid)] == 'LUAD', yes = 'dark blue', no = ifelse(patient.summary$simple.hist[match(rownames(to_plot), patient.summary$CRUKid)] == 'LUSC', yes = 'dark red', no = 'grey'))
+ barplot(height = rep(1, nrow(to_plot)), width = 0.5, axisnames = FALSE, col = tmp_col, border = FALSE, axes = FALSE)
+ to_plot$immune_classification <- patient.summary$immune_classification[match(rownames(to_plot), patient.summary$CRUKid)]
+ tmp <- mutTableAll.together.filtered[which(mutTableAll.together.filtered$nonsynonymous == FALSE),]
+ tmp <- tmp[!is.na(tmp$ITHState_RNA),]
+ tmp <- tmp[which(tmp$PyCloneClonal == 'C'),]
+ tmp <- tmp[which(tmp$DriverMut == FALSE),]
+ not_to_plot <- as.data.frame.matrix(table(tmp$CRUKid, tmp$ITHState_RNA), stringsAsFactors = FALSE)
+ not_to_plot <- not_to_plot/(rowSums(not_to_plot))
+ not_to_plot <- not_to_plot[ord,]
+ patient.summary$fraction_clonal_neo_ubiquitously_expressed <- to_plot[,1][match(patient.summary$CRUKid, rownames(to_plot))]
+ patient.summary$immune_classification <- factor(patient.summary$immune_classification, levels = c('low', 'mixed', 'high'))
+ boxplot(fraction_clonal_neo_ubiquitously_expressed ~ immune_classification, data = patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),], ylab = 'Fraction of clonal neoantigens UBIQUITOUSLY expressed', las = 1, ylim = c(0,1), outpch = NA)
+ beeswarm(fraction_clonal_neo_ubiquitously_expressed ~ immune_classification, data = patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),], method = 'swarm', corral = 'wrap', add = TRUE, pch = 16, pwcol = ifelse(patient.summary$simple.hist[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC'))] == 'LUAD', yes = 'darkblue', no = ifelse(patient.summary$simple.hist[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC'))] == 'LUSC', yes = 'firebrick4', no = 'lightgrey')))
+ p.val <- wilcox.test(fraction_clonal_neo_ubiquitously_expressed ~ immune_classification == 'low', data = patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),])$p.value
+ legend(x = 'topright', legend = paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)
+ tmp <- mutTableAll.together.filtered[which(mutTableAll.together.filtered$ITHState_RNA %in% c(1,2,3, 'FALSE')),]
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == TRUE),'count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == FALSE),'count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == TRUE),'count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == FALSE),'count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == TRUE),'count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == FALSE),'count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == TRUE),'count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == FALSE),'count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ bp <- barplot(ests, col = c(sapply(X = c('#00441b', '#BD2131', '#df9b2f', '#30ABDF'), FUN = function(x) rep(x, 2))), border = FALSE, las = 1, cex.axis = 0.9, cex.names = 0.7, ylab = 'OR -- neoantigen expression lost in RNA', ylim = c(0,1.5), names = '', axes = FALSE, space = rep(c(0.6,0.2),4))
+ axis(side = 2, at = c(0,0.5, 1.0, 1.5), labels = c(0, 0.5, 1.0, 1.5), las = 1, cex.axis = 0.8)
+ segments(x0 = bp[1,1], x1 = bp[1,1], y0 = ors1[1], y1 = ors2[1])
+ segments(x0 = bp[2,1], x1 = bp[2,1], y0 = ors1[2], y1 = ors2[2])
+ segments(x0 = bp[3,1], x1 = bp[3,1], y0 = ors1[3], y1 = ors2[3])
+ segments(x0 = bp[4,1], x1 = bp[4,1], y0 = ors1[4], y1 = ors2[4])
+ segments(x0 = bp[5,1], x1 = bp[5,1], y0 = ors1[5], y1 = ors2[5])
+ segments(x0 = bp[6,1], x1 = bp[6,1], y0 = ors1[6], y1 = ors2[6])
+ segments(x0 = bp[7,1], x1 = bp[7,1], y0 = ors1[7], y1 = ors2[7])
+ segments(x0 = bp[8,1], x1 = bp[8,1], y0 = ors1[8], y1 = ors2[8])
+ mtext(text = paste('p = ', round(p.vals, digits = 3), sep = ''), side = 1, line = 2, cex = 0.7, at = bp[,1])
+ mtext(text = c('+HLA LOH', '-HLA LOH'), side = 1, line = 1, cex = 0.7, at = bp[,1])
+ tmp_names <- factor(rep(c('All', 'High', 'Hetero.', 'Low'), each = 2), levels = c('All', 'High', 'Hetero.', 'Low'))
+ mtext(text = c('All', 'High', 'Hetero.', 'Low'), side = 1, line = 3, cex = 0.7, at = aggregate(bp, by = list(tmp_names), mean)$V1)
+ abline(h = 1, lty = 2, col = 'black')
+ tmp2 <- table(mutTableAll.together.filtered$tcga_gene_minimally_expressed_95[which(mutTableAll.together.filtered$nonsynonymous == TRUE)], mutTableAll.together.filtered$count[which(mutTableAll.together.filtered$nonsynonymous == TRUE)])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(mutTableAll.together.filtered$tcga_gene_minimally_expressed_95[which(mutTableAll.together.filtered$nonsynonymous == TRUE & mutTableAll.together.filtered$immune_classification == 'high')], mutTableAll.together.filtered$count[which(mutTableAll.together.filtered$nonsynonymous == TRUE & mutTableAll.together.filtered$immune_classification == 'high')])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(mutTableAll.together.filtered$tcga_gene_minimally_expressed_95[which(mutTableAll.together.filtered$nonsynonymous == TRUE & mutTableAll.together.filtered$immune_classification == 'mixed')], mutTableAll.together.filtered$count[which(mutTableAll.together.filtered$nonsynonymous == TRUE & mutTableAll.together.filtered$immune_classification == 'mixed')])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(mutTableAll.together.filtered$tcga_gene_minimally_expressed_95[which(mutTableAll.together.filtered$nonsynonymous == TRUE & mutTableAll.together.filtered$immune_classification == 'low')], mutTableAll.together.filtered$count[which(mutTableAll.together.filtered$nonsynonymous == TRUE & mutTableAll.together.filtered$immune_classification == 'low')])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ bp <- barplot(ests, col = c('#00441b', '#BD2131', '#df9b2f', '#30ABDF'), border = FALSE, las = 1, cex.axis = 0.9, cex.names = 0.7, ylab = 'OR -- neoantigens depleted in (TCGA) expressed genes', ylim = c(0,1), names = c('All', 'High', 'Hetero.', 'Low'), axes = FALSE)
+ axis(side = 2, at = c(0,0.5, 1.0), labels = c(0, 0.5, 1.0), las = 1, cex.axis = 0.8)
+ segments(x0 = bp[1,1], x1 = bp[1,1], y0 = ors1[1], y1 = ors2[1])
+ segments(x0 = bp[2,1], x1 = bp[2,1], y0 = ors1[2], y1 = ors2[2])
+ segments(x0 = bp[3,1], x1 = bp[3,1], y0 = ors1[3], y1 = ors2[3])
+ segments(x0 = bp[4,1], x1 = bp[4,1], y0 = ors1[4], y1 = ors2[4])
+ mtext(text = paste('p = ', formatC(p.vals, digits = 1, format = 'e'), sep = ''), side = 1, line = 2, cex = 0.7, at = bp[,1])
+ abline(h = 1, lty = 2, col = 'black')
+ for(cancer in c('LUAD', 'LUSC')){
+ colOrder <- c("CRUK0020", "CRUK0016", "CRUK0039", "CRUK0051", "CRUK0027", "CRUK0060", "CRUK0024", "CRUK0035", "CRUK0047", "CRUK0052", "CRUK0026", "CRUK0008", "CRUK0038", "CRUK0045", "CRUK0031", "CRUK0034", "CRUK0006", "CRUK0033", "CRUK0030", "CRUK0015", "CRUK0014", "CRUK0019", "CRUK0007", "CRUK0040", "CRUK0017", "CRUK0001", "CRUK0009", "CRUK0003", "CRUK0029", "CRUK0061", "CRUK0044", "CRUK0004", "CRUK0012", "CRUK0005", "CRUK0041", "CRUK0018", "CRUK0046", "CRUK0048", "CRUK0032", "CRUK0010", "CRUK0013", "CRUK0028", "CRUK0002", "CRUK0022", "CRUK0037", "CRUK0055", "CRUK0023", "CRUK0036", "CRUK0042", "CRUK0053", "CRUK0057", "CRUK0025", "CRUK0050", "CRUK0021", "CRUK0058", "CRUK0011", "CRUK0043", "CRUK0049", "CRUK0054", "CRUK0056", "CRUK0059")
+ colOrder <- c(c("CRUK0086","CRUK0074","CRUK0068","CRUK0079","CRUK0069","CRUK0064","CRUK0083","CRUK0072","CRUK0073","CRUK0081","CRUK0065","CRUK0078","CRUK0067","CRUK0070","CRUK0062","CRUK0082","CRUK0076","CRUK0071","CRUK0066","CRUK0084","CRUK0090","CRUK0075","CRUK0063","CRUK0089","CRUK0085","CRUK0088","CRUK0087","CRUK0092","CRUK0077","CRUK0080","CRUK0093","CRUK0091"))
+ tmp_df <- data.frame(colnames(all_condensed_alterations)[which(substr(colnames(all_condensed_alterations), 1, 8) %in% colOrder)], stringsAsFactors = FALSE)
+ tmp_df$SampleID <- substr(tmp_df[,1], 1, 8)
+ tmp_df$SampleID <- factor(tmp_df$SampleID, levels = colOrder)
+ tmp_df <- tmp_df[order(tmp_df$SampleID),]
+ condensed_alterations <- all_condensed_alterations[c('HLALOH', 'APC'),tmp_df[,1]]
+ # add in some clinical things
+ tmp_clinical <- matrix(NA, nrow=7, ncol=ncol(condensed_alterations), dimnames = list(c('ClonalNeo', 'SubclonalNeo', 'immune', 'PFS', 'packyears', 'IE', 'low_evasion'), colnames(condensed_alterations)))
+ tmp_clinical['ClonalNeo',] <- patient.summary.region$ClonalNeo[match(colnames(tmp_clinical), patient.summary.region$CRUKidRegion)]
+ tmp_clinical['SubclonalNeo',] <- patient.summary.region$SubclonalNeo[match(colnames(tmp_clinical), patient.summary.region$CRUKidRegion)]
+ tmp_clinical['immune',] <- patient.summary.region$immune_cluster[match(colnames(tmp_clinical), patient.summary.region$CRUKidRegion)]
+ tmp_clinical['IE',] <- patient.summary$immune_edited[match(substr(colnames(tmp_clinical), 1, 8), patient.summary$CRUKid)]
+ tmp_clinical['PFS',] <- patient.summary$allan_final_dsf_time[match(substr(colnames(tmp_clinical), 1, 8), patient.summary$CRUKid)]
+ cens <- patient.summary$allan_final_dsf[match(substr(colnames(tmp_clinical), 1, 8), patient.summary$CRUKid)]
+ tmp_clinical['packyears',] <- patient.summary$packyears[match(substr(colnames(tmp_clinical), 1, 8), patient.summary$CRUKid)]
+ tmp_clinical['low_evasion',] <- patient.summary$low_evasion[match(substr(colnames(tmp_clinical), 1, 8), patient.summary$CRUKid)]
+ color.gradient <- function(x, colors=c("dark blue","white","dark red"), colsteps=81) {
+ return( colorRampPalette(colors) (colsteps) [ findInterval(x, seq(min(x),max(x), length.out=colsteps)) ] )
+ pfs_times <- seq(from = 34, to = 1275, by = 1)
+ pfs_times_col_1 <- color.gradient(pfs_times, colors = c('#980043', '#d4b9da'), colsteps = 1242)
+ pfs_times_col_0 <- color.gradient(pfs_times, colors = c('#ccece6', '#006d2c'), colsteps = 1242)
+ tmp_smoke <- as.numeric(sort(unique(patient.summary[,'packyears'])))*100
+ tmp_smoke <- tmp_smoke[!is.na(tmp_smoke)]
+ packyearcols <- color.gradient(tmp_smoke, colors = c('lightblue', 'black'))
+ tmp_smoke <- tmp_smoke/100
+ layout(matrix(1:(nrow(condensed_alterations) + nrow(tmp_clinical)), ncol = 1))
+ par(mar = c(2,2,1,2), oma = c(4,4,4,4))
+ tmp_acceptedRegions_df <- acceptedRegions_df[match(colnames(condensed_alterations), acceptedRegions_df$CRUKidRegion),]
+ rbPal_nsMut <- colorRampPalette(c('#f0f9e8','#bae4bc','#7bccc4','#43a2ca','#0868ac'))
+ tmp_clinical_colors <- tmp_clinical
+ tmp_clinical_colors['ClonalNeo',] <- rbPal_nsMut(10)[as.numeric(cut(as.numeric(tmp_clinical['ClonalNeo',]),breaks=10))]
+ tmp_clinical_colors['SubclonalNeo',] <- rbPal_nsMut(10)[as.numeric(cut(as.numeric(tmp_clinical['SubclonalNeo',]),breaks=10))]
+ tmp_clinical_colors['immune',] <- ifelse(is.na(tmp_clinical_colors['immune',]), yes = 'darkgrey', no = ifelse(tmp_clinical_colors['immune',] == 'high', yes = '#BD2131', no = '#30ABDF'))
+ tmp_clinical_colors['PFS',which(cens == 0)] <- pfs_times_col_0[match(tmp_clinical['PFS',which(cens == 0)], pfs_times)]
+ tmp_clinical_colors['PFS',which(cens == 1)] <- pfs_times_col_1[match(tmp_clinical['PFS',which(cens == 1)], pfs_times)]
+ tmp_clinical_colors['packyears',] <- packyearcols[match(as.numeric(tmp_clinical_colors['packyears',]), tmp_smoke)]
+ tmp_clinical_colors['IE',] <- ifelse(tmp_clinical['IE',] == 'TRUE', yes = '#6a3d9a', no = '#cab2d6')
+ tmp_clinical_colors['low_evasion',] <- ifelse(tmp_clinical['low_evasion',] == 'TRUE', yes = '#2356A7', no = '#BE1E2D')
+ tmp_clinical_colors['low_evasion',] <- ifelse(is.na(tmp_clinical_colors['low_evasion',]), yes = 'darkgrey', no = tmp_clinical_colors['low_evasion',])
+ bp <- barplot(tmp_clinical[1:2,],
+ border = NA,axes = FALSE, axisnames = FALSE,
+ col = c('#377eb8', '#e41a1c'), ylim = c(0,1100),
+ space = unlist(sapply(tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)], FUN = function(x){c(2,rep(0.2,(x-1)))}))
+ axis(side = 2, at = c(0,500,1000), labels = c(0,500, 1000), line = 1, las = 1)
+ mtext(text = 'Neo', side=2,line = -25, cex = 5, las = 2, outer = FALSE)
+ sapply(3:7, FUN = function(x){
+ bp <- barplot(rep(1,ncol(tmp_clinical_colors)),
+ border = NA,axes = FALSE,
+ col = tmp_clinical_colors[x,],
+ space = unlist(sapply(tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)], FUN = function(x){c(2,rep(0.2,(x-1)))}))
+ sapply(1:nrow(condensed_alterations), FUN = function(x){
+ bp <- barplot(rep(1,ncol(condensed_alterations)),border = NA,axes = FALSE,
+ col = ifelse(as.numeric(condensed_alterations[x,]) == 1, yes = '#008000', no = ifelse(as.numeric(condensed_alterations[x,]) == -1, yes = 'blue', no = 'lightgrey')),
+ space = unlist(sapply(tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)], FUN = function(x){c(2,rep(0.2,(x-1)))}))
+ mtext(text = rownames(condensed_alterations)[x], side=2,line = -25, cex = 5, las = 2, outer = FALSE, col = 'black')
+ bp <- barplot(rep(1,ncol(condensed_alterations)),border = NA,axes = FALSE,space = unlist(sapply(tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)], FUN = function(x){c(2,rep(0.2,(x-1)))})), plot = FALSE)
+ nr <- tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)]
+ names(nr) <- unique(tmp_acceptedRegions_df$CRUKid)
+ text_start <- sapply(names(nr), FUN = function(x) {bp[(sum(nr[1:which(names(nr) == x)]) - nr[which(names(nr) == x)] + 1),1]} )
+ mtext(text = patient.summary$CRUKid[match(substr(names(nr), 3, nchar(names(nr))), patient.summary$CRUKid)], side=1,line = -1, cex = 5, las = 2, outer = FALSE, at = text_start, col = 'black')
+ tmp <- patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),]
+ tmp_low_clonal <- tmp[which(tmp$high_clonal == FALSE),]
+ survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ low_evasion,
+ main = "LUAD&LUSC\nlow immune evasion",
+ col = c('darkred', 'darkblue'),
+ lwd = 1, las = 1, cex.axis = 0.8)
+ survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ low_evasion,
+ main = "LUAD&LUSC\nlow immune evasion, low clonal neo",
+ col = c('darkred', 'darkblue'),
+ lwd = 1, las = 1, cex.axis = 0.8)
+ tmp$low_evasion_clonal <- ifelse(tmp$low_evasion == TRUE | tmp$high_clonal == TRUE, yes =TRUE, no = FALSE)
+ survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ low_evasion_clonal,
+ main = "LUAD&LUSC\nlow immune evasion, high clonal neo",
+ col = c('darkred', 'darkblue'),
+ lwd = 1, las = 1, cex.axis = 0.8)
+ boxplot(cd8.score.danaher ~ as.character(simple.hist), data = patient.summary.region[which(patient.summary.region$simple.hist %in% c('LUAD', 'LUSC')),], las = 1, ylab = 'Danaher CD8+ Score')
+ beeswarm(cd8.score.danaher ~ as.character(simple.hist), data = patient.summary.region[which(patient.summary.region$simple.hist %in% c('LUAD', 'LUSC')),], add = TRUE, pch = 16)
+ p.val <- wilcox.test(cd8.score.danaher ~ simple.hist, patient.summary.region[which(patient.summary.region$simple.hist %in% c('LUAD', 'LUSC')),])$p.value
+ legend(x = 'topright', legend = paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)
+ tmp <- patient.summary.region
+ boxplot(pathology_TILs ~ factor(orig_immune_cluster, levels = c('low', 'high')), data = tmp, las = 1, ylab = 'pathology estimated TILs', xlab = 'immune cluster')
+ beeswarm(pathology_TILs ~ factor(orig_immune_cluster, levels = c('low', 'high')), data = tmp, pch = 16, add = TRUE, cex = 1.5, pwcol = ifelse(is.na(tmp$orig_immune_cluster), yes = 'light grey', no = ifelse(tmp$orig_immune_cluster == 'high', yes = '#BD2131', no = '#30ABDF')))
+ p.val <- wilcox.test(pathology_TILs ~ orig_immune_cluster, data = tmp)$p.value
+ legend(x = 'topright', legend = paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)
+ tmp_all_danaher_accepted <- all_danaher_accepted[,c('cd8', 'cd4', 'bcell', 'cd45', 'cyto' ,'dend', 'mast', 'nkcd56dim', 'nk', 'tcells', 'th1')]
+ tmp_all_danaher_accepted$MDSC_jiang <- patient.summary.region$MDSC_jiang[match(rownames(tmp_all_danaher_accepted), patient.summary.region$CRUKidRegion)]
+ tmp_all_danaher_accepted$CAF_jiang <- patient.summary.region$CAF_jiang[match(rownames(tmp_all_danaher_accepted), patient.summary.region$CRUKidRegion)]
+ tmp_all_danaher_accepted$TAM.M2_jiang <- patient.summary.region$TAM.M2_jiang[match(rownames(tmp_all_danaher_accepted), patient.summary.region$CRUKidRegion)]
+ corr <- cor.mtest(tmp_all_danaher_accepted, method = 's')
+ M <- cor(tmp_all_danaher_accepted, method = 's')
+ corrplot(M, method = 'square', p.mat = corr$p, diag = TRUE, lowCI.mat = corr$lowCI, uppCI.mat = corr$uppCI, type = 'upper')
+ boxplot(TAM.M2_jiang ~ immune_cluster, data = patient.summary.region, las = 1, ylab = 'TAM M2 (Jiang)', xlab = 'Immune classification', out.pch = NA)
+ beeswarm(TAM.M2_jiang ~ immune_cluster, data = patient.summary.region, pch = 16, add = TRUE, col = 'black', method = 'swarm', corral = 'wrap')
+ p.val <- wilcox.test(TAM.M2_jiang ~ immune_cluster, data = patient.summary.region)$p.value
+ legend(x = 'bottomright', legend = paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)
+ boxplot(MDSC_jiang ~ immune_cluster, data = patient.summary.region, las = 1, ylab = 'MDSC (Jiang)', xlab = 'Immune classification', out.pch = NA)
+ beeswarm(MDSC_jiang ~ immune_cluster, data = patient.summary.region, pch = 16, add = TRUE, col = 'black', method = 'swarm', corral = 'wrap')
+ p.val <- wilcox.test(MDSC_jiang ~ immune_cluster, data = patient.summary.region)$p.value
+ legend(x = 'bottomright', legend = paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)
+ patient.summary.region$tmp_class <- ifelse(patient.summary.region$orig_immune_cluster == 'high' & patient.summary.region$pathology_TILs > 70, yes = 'high_high', no = ifelse(patient.summary.region$orig_immune_cluster == 'high' & patient.summary.region$pathology_TILs <= 20, yes = 'high_low', no = ifelse(patient.summary.region$orig_immune_cluster == 'low' & patient.summary.region$pathology_TILs <= 15, yes = 'low_low', no = ifelse(patient.summary.region$orig_immune_cluster == 'low' & patient.summary.region$pathology_TILs >= 48, yes = 'low_high', no = NA))))
+ patient.summary$low_high_region <- ifelse(patient.summary$CRUKid %in% patient.summary.region$CRUKid[which(patient.summary.region$tmp_class == 'low_high')], yes = TRUE, no = FALSE)
+ boxplot(cd8.score.danaher ~ tmp_class, data = patient.summary.region[which(patient.summary.region$tmp_class %in% c('low_low', 'low_high') & patient.summary.region$simple.hist == 'LUAD'),], las = 1, ylab = 'CD8+ score (Danaher)', ylim = c(0,4.5))
+ beeswarm(cd8.score.danaher ~ tmp_class, data = patient.summary.region[which(patient.summary.region$tmp_class %in% c('low_low', 'low_high') & patient.summary.region$simple.hist == 'LUAD'),], add=TRUE, pch = 16, cex = 1.4)
+ p.val <- wilcox.test(cd8.score.danaher ~ tmp_class, data = patient.summary.region[which(patient.summary.region$tmp_class %in% c('low_low', 'low_high') & patient.summary.region$simple.hist == 'LUAD'),])$p.value
+ legend(x = 'topleft', legend = paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)
+ boxplot(treg.score.danaher ~ tmp_class, data = patient.summary.region[which(patient.summary.region$tmp_class %in% c('low_low', 'low_high') & patient.summary.region$simple.hist == 'LUAD'),], las = 1, ylab = 'Treg score (Danaher)', ylim = c(0,2.5))
+ beeswarm(treg.score.danaher ~ tmp_class, data = patient.summary.region[which(patient.summary.region$tmp_class %in% c('low_low', 'low_high') & patient.summary.region$simple.hist == 'LUAD'),], add=TRUE, pch = 16, cex = 1.4)
+ p.val <- wilcox.test(treg.score.danaher ~ tmp_class, data = patient.summary.region[which(patient.summary.region$tmp_class %in% c('low_low', 'low_high') & patient.summary.region$simple.hist == 'LUAD'),])$p.value
+ legend(x = 'topleft', legend = paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)
+ layout(mat = matrix(c(1,2,0,0,3,4), nrow = 2), heights = c(0.9,0.1), widths = c(1,0.2,1))
+ par(oma = c(3.5,2,0,1)+0.1, mar = c(0,2.5,0,0)+0.5)
+ tmp <- patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),]
+ tmp <- tmp[!is.na(tmp$pathology_TILs),]
+ boop <- with(tmp, reorder(CRUKid, pathology_TILs, median, na.rm = TRUE))
+ bp <- boxplot(pathology_TILs ~ boop, ylim = c(0,100), data = tmp, las = 2, space = 0.3, cex.axis = 1, cex.lab = 0.8, names = FALSE, frame.plot = FALSE, at = 1:length(levels(boop)), outpch = NA)
+ beeswarm(pathology_TILs ~ boop, data = tmp, pch = 16, add = TRUE, cex = 1.5, pwcol = ifelse(is.na(tmp$orig_immune_cluster), yes = 'light grey', no = ifelse(tmp$orig_immune_cluster == 'high', yes = '#BD2131', no = '#30ABDF')))
+ mtext(text = 'pathology TILs', side = 2, line = 2.5, cex = 0.8)
+ legend('topleft', legend = c('immune high cluster', 'immune low cluster'), pch = 16, col = c('#BD2131','#30ABDF'), bty = 'n', cex = 1)
+ legend('bottomright', legend = 'LUAD', bty = 'n')
+ beep <- patient.summary$orig_immune_classification[match(levels(boop), patient.summary$CRUKid)]
+ bp <- barplot(rep(1, length(beep)), col = ifelse(is.na(beep), yes = 'darkgrey', no = ifelse(beep == 'low', yes = '#30ABDF', no = ifelse(beep == 'mixed', yes = '#df9b2f', no = '#BD2131'))), space = 0.3, axes = FALSE, border = NA)
+ mtext('immune\nclass', side = 2, line = 1, cex = 0.8, las = 1)
+ mtext(text = levels(boop), side = 1, line = 1, las = 2, at = bp[,1], cex = 0.7)
+ tmp <- patient.summary.region[which(patient.summary.region$simple.hist == 'LUSC'),]
+ tmp <- tmp[!is.na(tmp$pathology_TILs),]
+ boop <- with(tmp, reorder(CRUKid, pathology_TILs, median, na.rm = TRUE))
+ bp <- boxplot(pathology_TILs ~ boop, data = tmp, ylim = c(0,100), las = 2, space = 0.3, cex.axis = 1, cex.lab = 0.8, names = FALSE, frame.plot = FALSE, at = 1:length(levels(boop)), outpch = NA)
+ beeswarm(pathology_TILs ~ boop, data = tmp, pch = 16, add = TRUE, cex = 1.5, pwcol = ifelse(is.na(tmp$immune_cluster), yes = 'light grey', no = ifelse(tmp$immune_cluster == 'high', yes = '#BD2131', no = '#30ABDF')))
+ mtext(text = 'pathology TILs', side = 2, line = 2.5, cex = 0.8)
+ legend('topleft', legend = c('immune high cluster', 'immune low cluster'), pch = 16, col = c('#BD2131','#30ABDF'), bty = 'n', cex = 1)
+ legend('bottomright', legend = 'LUSC', bty = 'n')
+ beep <- patient.summary$orig_immune_classification[match(levels(boop), patient.summary$CRUKid)]
+ bp <- barplot(rep(1, length(beep)), col = ifelse(is.na(beep), yes = 'darkgrey', no = ifelse(beep == 'low', yes = '#30ABDF', no = ifelse(beep == 'mixed', yes = '#df9b2f', no = '#BD2131'))), space = 0.3, axes = FALSE, border = NA)
+ mtext('immune\nclass', side = 2, line = 1, cex = 0.8, las = 1)
+ mtext(text = levels(boop), side = 1, line = 1, las = 2, at = bp[,1], cex = 0.7)
+ tmp <- patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),]
+ tmp$regionalPattern[which(tmp$regionalPattern %in% c('MP', 'scar only', 'no tumour'))] <- 'other'
+ tmp$regionalPattern <- factor(tmp$regionalPattern, levels = c('cribriform', 'solid', 'lepidic', 'other', 'papillary', 'acinar'))
+ het_luad_patients <- unique(tmp$CRUKid[which(tmp$orig_immune_classification == 'mixed')])
+ het_tmp <- tmp[which(tmp$CRUKid %in% het_luad_patients),c('CRUKid', 'immune_cluster', 'regionalPattern')]
+ het_tmp$immune_cluster <- as.character(het_tmp$immune_cluster)
+ het_tmp$regionalPattern <- as.character(het_tmp$regionalPattern)
+ het_tmp$regionalPattern[is.na(het_tmp$regionalPattern)] <- 'NA'
+ het_tmp <- het_tmp[!is.na(het_tmp$immune_cluster),]
+ patient.summary$pathologic_heterogeneity <- sapply(patient.summary$CRUKid, FUN = function(x){if(patient.summary$simple.hist[which(patient.summary$CRUKid == x)] %in% c('LUSC', 'other')){return(NA)}; tmp<-patient.summary.region[which(patient.summary.region$CRUKid == x),]; tmp <- tmp[!is.na(tmp$regionalPattern),]; tmp$regionalPattern[which(tmp$regionalPattern %in% c('MP', 'scar only', 'no tumour'))] <- 'other'; if(all(is.na(tmp$regionalPattern))){return(NA)}; ifelse(length(unique(tmp$regionalPattern)) == 1, yes = return('homo'), no = return('het'))})
+ p.val <- round(fisher.test(table(patient.summary$pathologic_heterogeneity, patient.summary$orig_immune_classification == 'mixed'))$p.value, digits = 3)
+ layout(mat = matrix(c(1,2), nrow = 1), widths = c(0.4,0.6))
+ barplot(table(patient.summary$pathologic_heterogeneity, patient.summary$immune_classification == 'mixed'), names.arg = c('Homogeneous immune', 'Heterogeneous immune'), col = rev(c('#00441b', '#a1d99b')), las = 1, xlab = 'Lung adeno. patients', ylab = 'Number patients', cex.axis = 1, cex.names = 1, cex.lab = 1)
+ legend('topright',legend = c(paste('p: ', p.val, sep = ''), 'Heterogeneous pathology', 'Homogeneous pathology'), fill = rev(c('#00441b', '#a1d99b', NA)), bty = 'n', border = NA, cex = 0.8)
+ plot(x = 0, axes = FALSE, pch = NA, xlim = c(1,30), ylim = c(0,8), ylab = 'Tumor Regions', xlab = '', main = 'Histologies of regions from heterogeneous patients', cex.main = 0.9, frame.plot = FALSE, las =1, cex.lab = 0.9)
+ histcols <- setNames(c('#a6cee3','#33a02c', '#ff7f00', '#6a3d9a', '#fb9a99', 'lightgrey', '#a65628'), c('acinar','solid', 'lepidic', 'cribriform', 'papillary', 'NA', 'other'))
+ for(pat in sort(unique(het_tmp$CRUKid))){
+ x = which(sort(unique(het_tmp$CRUKid)) == pat)
+ tmp_plot <- het_tmp[which(het_tmp$CRUKid == pat),]
+ tmp_plot <- tmp_plot[order(tmp_plot$regionalPattern),]
+ tmp_plot <- tmp_plot[order(tmp_plot$immune_cluster, decreasing = TRUE),]
+ tmp_plot$immune_color <- ifelse(tmp_plot$immune_cluster == 'low', yes = '#30ABDF', no = '#BD2131')
+ tmp_plot$hist_color <- histcols[match(tmp_plot$regionalPattern, names(histcols))]
+ for(i in 1:nrow(tmp_plot)){
+ rect(xleft = 2*x-0.8, xright = 2*x, ybottom = i-1, ytop = i, col = tmp_plot$immune_color[i], border = NA)
+ rect(xleft = 2*x, xright = 2*x+0.8, ybottom = i-1, ytop = i, col = tmp_plot$hist_color[i], border = NA)
+ axis(side = 1, at = seq(from = 2, to = 16, by = 2), labels = sort(unique(het_tmp$CRUKid)), las = 2, cex.axis = 0.8)
+ axis(side = 2, at = c(0,4,8), labels = c(0,4,8), las = 1, cex.axis = 0.8)
+ legend(x = 'topleft', legend = c('low', 'high', names(histcols)), fill = c('#30ABDF', '#BD2131', histcols), border = NA, bty = 'n', ncol = 5, cex = 0.8)
+ barplot(table(patient.summary$orig_immune_classification)[c('low', 'mixed', 'high')], col = c('#30ABDF', '#df9b2f', '#BD2131'), border = NA, las = 1, ylab = 'Number patients', ylim = c(0,40))
+ barplot(table(patient.summary$immune_classification)[c('low', 'mixed', 'high')], col = c('#30ABDF', '#df9b2f', '#BD2131'), border = NA, las = 1, ylab = 'Number patients', ylim = c(0,40))
+ het_tmb_patients <- patient.summary$CRUKid[which(patient.summary$TMB_het == TRUE)]
+ het_tmb_dataframe <- data.frame(row.names = het_tmb_patients)
+ het_tmb_dataframe$low_tmb_purity <- sapply(rownames(het_tmb_dataframe), FUN = function(x){
+ regions_low_tmb <- patient.summary.region$CRUKidRegion[which(patient.summary.region$CRUKid == x & patient.summary.region$TotalMut_Varscan/30 <= 10)]
+ summary(patient.summary.region$purity[which(patient.summary.region$CRUKidRegion %in% regions_low_tmb)])['Median']
+ het_tmb_dataframe$high_tmb_purity <- sapply(rownames(het_tmb_dataframe), FUN = function(x){
+ regions_high_tmb <- patient.summary.region$CRUKidRegion[which(patient.summary.region$CRUKid == x & patient.summary.region$TotalMut_Varscan/30 > 10)]
+ summary(patient.summary.region$purity[which(patient.summary.region$CRUKidRegion %in% regions_high_tmb)])['Median']
+ p.val <- t.test(het_tmb_dataframe$low_tmb_purity, het_tmb_dataframe$high_tmb_purity, paired = TRUE)$p.value
+ stripchart(het_tmb_dataframe, vertical = TRUE, pch= 16, at = c(0.9, 1.2), xlim =c(0.7,1.4), las = 1, group.names = c('Low TMB regions', 'High TMB regions'), cex = 1.5, ylab = 'Tumor region purity')
+ segments(x0 = 0.9, y0 = het_tmb_dataframe$low_tmb_purity, x1 = 1.2, y1 = het_tmb_dataframe$high_tmb_purity, col = '#df9b2f', lwd = 3)
+ legend(legend = paste('paired t-test p: ', round(p.val, digits = 2), sep = ''), x = 'topleft', bty = 'n', cex = 0.9)
+ layout(mat = matrix(c(1,2,3,0), nrow = 4), heights = c(0.9,0.1,0.1, 0.1), widths = c(1))
+ par(oma = c(3.5,2,0,1)+0.1, mar = c(0,2.5,0,0)+0.5)
+ tmp <- patient.summary.region[which(patient.summary.region$simple.hist %in% c('LUAD', 'LUSC')),]
+ tmp <- tmp[!is.na(tmp$TIDE_score),]
+ tmp <- tmp[which(tmp$number.regions.rna.accepted > 1),]
+ boop <- with(tmp, reorder(CRUKid, TIDE_score, FUN = function(x) -min(x, na.rm = TRUE)))
+ bp <- boxplot(TIDE_score ~ boop, data = tmp, ylim = c(-2,3), las = 2, space = 0.3, cex.axis = 1, cex.lab = 0.8, names = FALSE, frame.plot = FALSE, at = 1:length(levels(boop)), outpch = NA)
+ beeswarm(TIDE_score ~ boop, data = tmp, pch = 16, add = TRUE, cex = 1, pwcol = ifelse(is.na(tmp$immune_cluster), yes = 'light grey', no = ifelse(tmp$immune_cluster == 'high', yes = '#BD2131', no = '#30ABDF')))
+ abline(h = 0, col = 'black', lty = 2)
+ text(x = 0.1, y = 0.1, labels = 'TIDE threshold', pos = 4, cex = 0.7)
+ mtext(text = 'TIDE Score', side = 2, line = 2.5, cex = 1)
+ legend('topleft', legend = c('immune high cluster', 'immune low cluster'), pch = 16, col = c('#BD2131','#30ABDF'), bty = 'n', cex = 1)
+ #legend('bottomright', legend = cancer, bty = 'n')
+ beep <- patient.summary$immune_classification[match(levels(boop), patient.summary$CRUKid)]
+ bp <- barplot(rep(1, length(beep)), col = ifelse(beep == 'low', yes = '#30ABDF', no = ifelse(beep == 'mixed', yes = '#df9b2f', no = '#BD2131')), space = 0.3, axes = FALSE, border = NA)
+ mtext('immune\nclass', side = 2, line = 1, cex = 0.8, las = 1)
+ beep <- patient.summary$TIDE_enrichment_group[match(levels(boop), patient.summary$CRUKid)]
+ beep_color <- ifelse(is.na(beep), yes = 'lightgrey', no = ifelse(beep == 'False', yes = '#e0f3f8', no = ifelse(beep == 'True', yes = '#4575b4', no = '#df9b2f')))
+ bp <- barplot(rep(1, length(beep)), col = beep_color, space = 0.3, axes = FALSE, border = NA)
+ mtext('TIDE', side = 2, line = 1, cex = 0.8, las = 1)
+ layout(mat = matrix(c(1,2,3,0), nrow = 4), heights = c(0.9,0.1,0.1, 0.1, 0.1), widths = c(1))
+ par(oma = c(3.5,2,0,1)+0.1, mar = c(0,2.5,0,0)+0.5)
+ tmp <- patient.summary.region[which(patient.summary.region$simple.hist %in% c('LUAD', 'LUSC')),]
+ tmp <- tmp[!is.na(tmp$ipres_enrichment),]
+ tmp <- tmp[which(tmp$number.regions.rna.accepted > 1),]
+ boop <- with(tmp, reorder(CRUKid, ipres_enrichment, max, na.rm = TRUE))
+ bp <- boxplot(ipres_enrichment ~ boop, data = tmp, ylim = c(-2,2), las = 2, space = 0.3, cex.axis = 1, cex.lab = 0.8, names = FALSE, frame.plot = FALSE, at = 1:length(levels(boop)), outpch = NA)
+ beeswarm(ipres_enrichment ~ boop, data = tmp, pch = 16, add = TRUE, cex = 1.2, pwcol = ifelse(is.na(tmp$immune_cluster), yes = 'light grey', no = ifelse(tmp$immune_cluster == 'high', yes = '#BD2131', no = '#30ABDF')))
+ abline(h = 0.35, col = 'black', lty = 2)
+ text(x = 0.1, y = 0.4, labels = 'IPRES threshold', pos = 4, cex = 0.7)
+ mtext(text = 'IPRES Score', side = 2, line = 2.5, cex = 1)
+ legend('topleft', legend = c('immune high cluster', 'immune low cluster'), pch = 16, col = c('#BD2131','#30ABDF'), bty = 'n', cex = 1)
+ beep <- patient.summary$ipres_enrichment_group[match(levels(boop), patient.summary$CRUKid)]
+ beep_color <- ifelse(is.na(beep), yes = 'lightgrey', no = ifelse(beep == 'FALSE', yes = '#e0f3f8', no = ifelse(beep == 'TRUE', yes = '#4575b4', no = '#df9b2f')))
+ bp <- barplot(rep(1, length(beep)), col = beep_color, space = 0.3, axes = FALSE, border = NA)
+ mtext('IPRES', side = 2, line = 1, cex = 0.8, las = 1)
+ beep <- patient.summary$immune_classification[match(levels(boop), patient.summary$CRUKid)]
+ bp <- barplot(rep(1, length(beep)), col = ifelse(beep == 'low', yes = '#30ABDF', no = ifelse(beep == 'mixed', yes = '#df9b2f', no = '#BD2131')), space = 0.3, axes = FALSE, border = NA)
+ mtext('immune\nclass', side = 2, line = 1, cex = 0.8, las = 1)
+ layout(mat = matrix(c(1,2,3,3), nrow = 2), heights = c(0.9, 0.1), widths = c(0.7, 0.3))
+ par(oma = c(3.5,2,0,1)+0.1, mar = c(0,2.5,0,0)+0.5)
+ tmp <- patient.summary.region[which(patient.summary.region$simple.hist %in% c('LUAD', 'LUSC')),]
+ tmp <- tmp[!is.na(tmp$ifng_signature_expanded),]
+ tmp <- tmp[which(tmp$number.regions.rna.accepted > 1),]
+ boop <- with(tmp, reorder(CRUKid, ifng_signature_expanded, max, na.rm = TRUE))
+ bp <- boxplot(ifng_signature_expanded ~ boop, data = tmp, ylim = c(4,10), las = 2, space = 0.3, cex.axis = 1, cex.lab = 0.8, names = FALSE, frame.plot = FALSE, at = 1:length(levels(boop)), outpch = NA)
+ beeswarm(ifng_signature_expanded ~ boop, data = tmp, pch = 16, add = TRUE, cex = 1.2, pwcol = ifelse(is.na(tmp$immune_cluster), yes = 'light grey', no = ifelse(tmp$immune_cluster == 'high', yes = '#BD2131', no = '#30ABDF')))
+ mtext(text = 'IFNg signature, expanded', side = 2, line = 2.5, cex = 1)
+ legend('topleft', legend = c('immune high cluster', 'immune low cluster'), pch = 16, col = c('#BD2131','#30ABDF'), bty = 'n', cex = 1)
+ beep <- patient.summary$immune_classification[match(levels(boop), patient.summary$CRUKid)]
+ bp <- barplot(rep(1, length(beep)), col = ifelse(beep == 'low', yes = '#30ABDF', no = ifelse(beep == 'mixed', yes = '#df9b2f', no = '#BD2131')), space = 0.3, axes = FALSE, border = NA)
+ mtext('immune\nclass', side = 2, line = 1, cex = 0.8, las = 1)
+ boxplot(Ayers_diff ~ immune_classification == 'mixed', patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),], las = 1, xlab = 'Immune heterogeneous', ylab = 'IFNg signature, expanded (Ayers)')
+ beeswarm(Ayers_diff ~ immune_classification == 'mixed', patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),], pch = 16, add=TRUE)
+ p.val <- wilcox.test(Ayers_diff ~ immune_classification == 'mixed', patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),])
+ legend('topleft', paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), sep = ''), bty = 'n', cex = 1)
+ layout(mat = matrix(c(1,2), nrow = 2), heights = c(0.9,0.1), widths = c(1))
+ par(oma = c(3.5,2,0,1)+0.1, mar = c(0,2.5,0,0)+0.5)
+ tmp <- patient.summary.region[which(patient.summary.region$simple.hist != 'other'),]
+ boop <- with(tmp, reorder(CRUKid, TotalMut_Varscan, max, na.rm = TRUE))
+ bp <- boxplot(TotalMut_Varscan/30 ~ boop, data = tmp, ylim = c(0.5,100), las = 2, space = 0.3, cex.axis = 1, cex.lab = 0.8, names = FALSE, frame.plot = FALSE, at = 1:length(levels(boop)), outpch = NA, log = 'y', col = 'white')
+ stripchart(TotalMut_Varscan/30 ~ boop, data = tmp, pch = 16, add = TRUE, cex = 1, vertical=TRUE)
+ abline(h = 10, col = 'black', lty = 2)
+ text(x = 0.1, y = 11, labels = 'TMB threshold', pos = 4, cex = 0.7)
+ mtext(text = 'TMB (mut/Mb) [VarScan]', side = 2, line = 2.5, cex = 1)
+ beep <- patient.summary$TMB_het_col[match(levels(boop), patient.summary$CRUKid)]
+ bp <- barplot(rep(1, length(beep)), col = beep, space = 0.3, axes = FALSE, border = NA)
+ mtext('TMB +', side = 2, line = 1, cex = 0.8, las = 1)
+ par(mfrow = c(5,1), mar = c(1,4,1,2))
+ tmp <- patient.summary[,c('simple.hist', 'immune_classification', 'ipres_enrichment_group', 'TIDE_enrichment_group', 'TMB_any', 'TMB_het')]
+ tmp$TMB_group <- ifelse(tmp$TMB_any & tmp$TMB_het, yes = 'mixed', no = ifelse(tmp$TMB_any, yes = 'TRUE', no ='FALSE'))
+ tmp$TIDE_enrichment_group <- gsub(pattern = 'True', replacement = 'TRUE', x = tmp$TIDE_enrichment_group)
+ tmp$TIDE_enrichment_group <- gsub(pattern = 'False', replacement = 'FALSE', x = tmp$TIDE_enrichment_group)
+ tmp <- tmp[which(tmp$simple.hist != 'other'),]
+ tmp$num_pos <- rowSums(tmp[,c('TMB_group', 'TIDE_enrichment_group', 'ipres_enrichment_group')]=='TRUE', na.rm = TRUE)
+ tmp <- tmp[with(tmp, order(immune_classification, num_pos, TMB_group, TIDE_enrichment_group, ipres_enrichment_group, decreasing = TRUE)),]
+ beep <- tmp$simple.hist
+ beep_color <- ifelse(is.na(beep), yes = 'lightgrey', no = ifelse(beep == 'LUAD', yes = 'dark blue', no = ifelse(beep == 'LUSC', yes = 'dark red', no = 'lightgrey')))
+ bp <- barplot(rep(1, length(beep)), col = beep_color, space = 0.3, axes = FALSE, border = NA)
+ mtext('Hist.', side = 2, line = 1, cex = 0.8, las = 1)
+ beep <- tmp$immune_classification
+ bp <- barplot(rep(1, length(beep)), col = ifelse(is.na(beep), yes = 'lightgrey', no = ifelse(beep == 'low', yes = '#30ABDF', no = ifelse(beep == 'mixed', yes = '#df9b2f', no = '#BD2131'))), space = 0.3, axes = FALSE, border = NA)
+ mtext('immune\nclass', side = 2, line = 1, cex = 0.8, las = 1)
+ beep_color <- ifelse(is.na(beep), yes = 'lightgrey', no = ifelse(beep == 'FALSE', yes = '#e0f3f8', no = ifelse(beep == 'TRUE', yes = '#4575b4', no = '#df9b2f')))
+ bp <- barplot(rep(1, length(beep)), col = beep_color, space = 0.3, axes = FALSE, border = NA)
+ mtext('TMB+', side = 2, line = 1, cex = 0.8, las = 1)
+ beep <- tmp$TIDE_enrichment_group
+ beep_color <- ifelse(is.na(beep), yes = 'lightgrey', no = ifelse(beep == 'FALSE', yes = '#e0f3f8', no = ifelse(beep == 'TRUE', yes = '#4575b4', no = '#df9b2f')))
+ bp <- barplot(rep(1, length(beep)), col = beep_color, space = 0.3, axes = FALSE, border = NA)
+ mtext('TIDE', side = 2, line = 1, cex = 0.8, las = 1)
+ beep <- tmp$ipres_enrichment_group
+ beep_color <- ifelse(is.na(beep), yes = 'lightgrey', no = ifelse(beep == 'FALSE', yes = '#e0f3f8', no = ifelse(beep == 'TRUE', yes = '#4575b4', no = '#df9b2f')))
+ bp <- barplot(rep(1, length(beep)), col = beep_color, space = 0.3, axes = FALSE, border = NA)
+ mtext('IPRES', side = 2, line = 1, cex = 0.8, las = 1)
+ plot(total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'cn'], total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'immune'], xlab = 'Pairwise cn distance', ylab = 'Pairwise immune distance', pch = 16, col = total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'color'], las = 1)
+ p.val <- cor.test(total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'cn'], total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'immune'], method = 's')
+ legend(x = 'topleft', legend = paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), '\n', 'rho: ', formatC(p.val$estimate, digits = 1, format = 'e'), sep= ''), bty = 'n')
+ points(total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'cn'], total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'immune'], xlab = 'Pairwise cn distance', ylab = 'Pairwise immune distance', pch = 16, col = total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'color'])
+ p.val <- cor.test(total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'cn'], total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'immune'], method = 's')
+ legend(x = 'topright', legend = paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), '\n', 'rho: ', formatC(p.val$estimate, digits = 1, format = 'e'), sep= ''), bty = 'n')
+ boxplot((nk.score.danaher)/total.til.score.danahaer ~ !homo_c, data = patient.summary.region[which(patient.summary.region$hla_c_lost == TRUE),], las = 1, ylab = 'NK cell', xlab = 'C1/C2 heterozygosity', main = 'Patients with HLA-C LOH', ylim = c(0,1))
+ beeswarm((nk.score.danaher)/total.til.score.danahaer ~ !homo_c, data = patient.summary.region[which(patient.summary.region$hla_c_lost == TRUE),], add=TRUE, pch = 16, col = 'darkgreen')
+ p.val <- wilcox.test((nk.score.danaher/total.til.score.danahaer) ~ homo_c, data = patient.summary.region[which(patient.summary.region$hla_c_lost == TRUE),])$p.value
+ legend('topleft', paste('p: ', formatC(p.val, digits = 1, format = 'e'), sep = ''), bty = 'n', cex = 0.8)
+ legend('topright', 'tracerx', cex = 0.8, bty = 'n')
+ boxplot((nk.score.danaher)/total.til.score.danahaer ~ !homo_c, data = patient.summary.region[which(patient.summary.region$hla_c_lost == FALSE),], las = 1, ylab = 'NK cell', xlab = 'C1/C2 heterozygosity', main = 'Patients without HLA-C LOH', ylim = c(0,1))
+ beeswarm((nk.score.danaher)/total.til.score.danahaer ~ !homo_c, data = patient.summary.region[which(patient.summary.region$hla_c_lost == FALSE),], add=TRUE, pch = 16, col = 'darkgreen')
+ p.val <- wilcox.test((nk.score.danaher)/total.til.score.danahaer ~ homo_c, data = patient.summary.region[which(patient.summary.region$hla_c_lost == FALSE),])$p.value
+ legend('topleft', paste('p: ', formatC(p.val, digits = 1, format = 'e'), sep = ''), bty = 'n', cex = 0.8)
+ plot.with.confidence(patient.summary.region$shannon.ith[which(patient.summary.region$simple.hist == 'LUAD')], patient.summary.region$cd8.score.danaher[which(patient.summary.region$simple.hist == 'LUAD')], pch = 16, xlab = 'Shannon ITH', ylab = 'CD8+ Danaher', main = 'Lung adeno.', col = 'dark blue', cex = 1.6)
+ plot.with.confidence(patient.summary.region$shannon.ith[which(patient.summary.region$simple.hist == 'LUSC')], patient.summary.region$cd8.score.danaher[which(patient.summary.region$simple.hist == 'LUSC')], pch = 16, xlab = 'Shannon ITH', ylab = 'CD8+ Danaher', main = 'Lung squam.', col = 'dark red', cex = 1.6)
+ plot(patient.summary.region$purity[which(patient.summary.region$simple.hist == 'LUAD')], patient.summary.region$pathology_TILs[which(patient.summary.region$simple.hist == 'LUAD')], pch = 16, col = 'dark blue', ylab = 'pathology estimated TILs', xlab = 'tumor purity', las = 1, main = '', cex.main = 1, font.main = 1, xlim = c(0,1))
+ p.val <- cor.test(patient.summary.region$purity[which(patient.summary.region$simple.hist == 'LUAD')], patient.summary.region$pathology_TILs[which(patient.summary.region$simple.hist == 'LUAD')], method = 's')
+ legend(x = 'topright', legend = paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), '\n', 'rho: ', formatC(p.val$estimate, digits = 1, format = 'e'), sep= ''), bty = 'n', cex = 0.7)
+ points(patient.summary.region$purity[which(patient.summary.region$simple.hist == 'LUSC')], patient.summary.region$pathology_TILs[which(patient.summary.region$simple.hist == 'LUSC')], pch = 16, col = 'dark red', ylab = 'pathology estimated TILs', xlab = 'tumor purity')
+ p.val <- cor.test(patient.summary.region$purity[which(patient.summary.region$simple.hist == 'LUSC')], patient.summary.region$pathology_TILs[which(patient.summary.region$simple.hist == 'LUSC')], method = 's')
+ legend(x = 'bottomright', legend = paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), '\n', 'rho: ', formatC(p.val$estimate, digits = 1, format = 'e'), sep= ''), bty = 'n', cex = 0.7)
+ tmp <- gsub(pattern = '0%', replacement = '#30ABDF', x = gsub(pattern = '50%', replacement = '#BD2131', x = patient.summary.region$TIL_quart[which(patient.summary.region$simple.hist == 'LUAD')]))
+ boxplot(shannon.ith ~ TIL_cat, data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),], las = 1, main = 'Lung adeno.')
+ beeswarm(shannon.ith ~ TIL_cat, data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),], add = TRUE, pch = 16, pwcol = tmp, cex = 1.6)
+ p.val <- wilcox.test(shannon.ith ~ TIL_cat == 'low', data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),])$p.value
+ legend('bottomright', paste('p -- low-mixed/high : ', round(p.val, digits = 2), sep = ''), bty = 'n')
+ tmp <- patient.summary[,c('ie.all.updated', 'ie.clonal.updated', 'ie.subclonal.updated', 'simple.hist', 'immune_classification', 'anyCPN_remove')]
+ tmp <- tmp[complete.cases(tmp),]
+ boxplot(ie.all.updated ~ as.character(simple.hist), data = tmp[which(tmp$simple.hist %in% c('LUAD', 'LUSC')),], las = 1, ylab = 'Immunoediting score')
+ beeswarm(ie.all.updated ~ as.character(simple.hist), data = tmp[which(tmp$simple.hist %in% c('LUAD', 'LUSC')),], pwcol = ifelse(tmp$simple.hist[which(tmp$simple.hist %in% c('LUAD', 'LUSC'))] == 'LUAD', yes = 'dark blue', no = 'dark red'), pch = 16, add =TRUE)
+ p.val <- wilcox.test(ie.all.updated ~ as.character(simple.hist), data = tmp[which(tmp$simple.hist %in% c('LUAD', 'LUSC')),])$p.value
+ legend('bottomright', paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)
+ p.val <- cor.test(as.numeric(patient.summary$ie.all.updated[which(patient.summary$simple.hist != 'other')]), as.numeric(patient.summary$number.unique.hlas[which(patient.summary$simple.hist != 'other')]), method= 's')
+ boxplot(ie.all.updated ~ number.unique.hlas, data = patient.summary[which(patient.summary$simple.hist != 'other'),], las = 1, ylab = 'Immunoediting score', xlab = 'Number unique HLAs')
+ beeswarm(ie.all.updated ~ number.unique.hlas, data = patient.summary[which(patient.summary$simple.hist != 'other'),], pch = 16, add =TRUE)
+ legend(x = 'topleft', legend = paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), '\nrho: ', round(p.val$estimate, digits = 2), sep = ''), bty = 'n', cex = 1.3)
+ tmp <- mutTableAll.together.filtered[which(mutTableAll.together.filtered$ITHState_RNA %in% c(1,2,3, 'FALSE')),]
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == TRUE),'strong.count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == FALSE),'strong.count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == TRUE),'strong.count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == FALSE),'strong.count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == TRUE),'strong.count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == FALSE),'strong.count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == TRUE),'strong.count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == FALSE),'strong.count'])
+ f.test <- fisher.test(tmp2)
+ ests <- c(ests, f.test$estimate)
+ p.vals <- c(p.vals, f.test$p.value)
+ ors1 <- c(ors1, f.test$conf.int[1])
+ ors2 <- c(ors2, f.test$conf.int[2])
+ bp <- barplot(ests, col = c(sapply(X = c('#00441b', '#BD2131', '#df9b2f', '#30ABDF'), FUN = function(x) rep(x, 2))), border = FALSE, las = 1, cex.axis = 0.9, cex.names = 0.7, ylab = 'OR -- neoantigen expression lost in RNA', ylim = c(0,1.5), names = '', axes = FALSE, space = rep(c(0.6,0.2),4))
+ axis(side = 2, at = c(0,0.5, 1.0, 1.5), labels = c(0, 0.5, 1.0, 1.5), las = 1, cex.axis = 0.8)
+ segments(x0 = bp[1,1], x1 = bp[1,1], y0 = ors1[1], y1 = ors2[1])
+ segments(x0 = bp[2,1], x1 = bp[2,1], y0 = ors1[2], y1 = ors2[2])
+ segments(x0 = bp[3,1], x1 = bp[3,1], y0 = ors1[3], y1 = ors2[3])
+ segments(x0 = bp[4,1], x1 = bp[4,1], y0 = ors1[4], y1 = ors2[4])
+ segments(x0 = bp[5,1], x1 = bp[5,1], y0 = ors1[5], y1 = ors2[5])
+ segments(x0 = bp[6,1], x1 = bp[6,1], y0 = ors1[6], y1 = ors2[6])
+ segments(x0 = bp[7,1], x1 = bp[7,1], y0 = ors1[7], y1 = ors2[7])
+ segments(x0 = bp[8,1], x1 = bp[8,1], y0 = ors1[8], y1 = ors2[8])
+ mtext(text = paste('p = ', round(p.vals, digits = 3), sep = ''), side = 1, line = 2, cex = 0.7, at = bp[,1])
+ mtext(text = c('+HLA LOH', '-HLA LOH'), side = 1, line = 1, cex = 0.7, at = bp[,1])
+ tmp_names <- factor(rep(c('All', 'High', 'Hetero.', 'Low'), each = 2), levels = c('All', 'High', 'Hetero.', 'Low'))
+ mtext(text = c('All', 'High', 'Hetero.', 'Low'), side = 1, line = 3, cex = 0.7, at = aggregate(bp, by = list(tmp_names), mean)$V1)
+ abline(h = 1, lty = 2, col = 'black')
+ neo <- table(mutTableRegion$mutant.expressed.binary[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$count == 1)], mutTableRegion$expression.tpm[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$count == 1)] > 1)
+ non_neo <- table(mutTableRegion$mutant.expressed.binary[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$count == 0)], mutTableRegion$expression.tpm[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$count == 0)] > 1)
+ k <- rbind(rev(non_neo[1,]), rev(neo[1,]))
+ rownames(k) <- c('non-neo', 'neo')
+ f.test <- fisher.test(rbind(rev(non_neo[1,]), rev(neo[1,])))
+ barplot(t(t(k)/rowSums(t(k))), names.arg = c('Gene expressed', 'Gene not expressed'), las = 1, col = c('#a6bddb', '#2b8cbe'), main = 'Neo')
+ legend(x = 'bottomleft', fill =c('#a6bddb', '#2b8cbe'), border = NA, bty = 'n', legend = c('Non-neo', 'Neo'), xpd = TRUE, inset = c(0,-0.35))
+ legend(x = 'bottomleft', border = NA, bty = 'n', legend = paste('p: ', formatC(f.test$p.value, digits = 1, format = 'e'), '\nOR: ', round(f.test$estimate, digits = 1), sep = ''), xpd = FALSE)
+ neo <- table(mutTableRegion$mutant.expressed.binary[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$strong.count == 1)], mutTableRegion$expression.tpm[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$strong.count == 1)] > 1)
+ non_neo <- table(mutTableRegion$mutant.expressed.binary[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$strong.count == 0)], mutTableRegion$expression.tpm[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$strong.count == 0)] > 1)
+ k <- rbind(rev(non_neo[1,]), rev(neo[1,]))
+ rownames(k) <- c('non-neo', 'neo')
+ f.test <- fisher.test(rbind(rev(non_neo[1,]), rev(neo[1,])))
+ barplot(t(t(k)/rowSums(t(k))), names.arg = c('Gene expressed', 'Gene not expressed'), las = 1, col = c('#a6bddb', '#2b8cbe'), main = 'Strong neo')
+ legend(x = 'bottomleft', fill =c('#a6bddb', '#2b8cbe'), border = NA, bty = 'n', legend = c('Non-neo', 'Neo'), xpd = TRUE, inset = c(0,-0.35))
+ legend(x = 'bottomleft', border = NA, bty = 'n', legend = paste('p: ', formatC(f.test$p.value, digits = 1, format = 'e'), '\nOR: ', round(f.test$estimate, digits = 1), sep = ''), xpd = FALSE)
+ for(cancer in c('LUAD', 'LUSC')){
+ colOrder <- c("CRUK0020", "CRUK0016", "CRUK0039", "CRUK0051", "CRUK0027", "CRUK0060", "CRUK0024", "CRUK0035", "CRUK0047", "CRUK0052", "CRUK0026", "CRUK0008", "CRUK0038", "CRUK0045", "CRUK0031", "CRUK0034", "CRUK0006", "CRUK0033", "CRUK0030", "CRUK0015", "CRUK0014", "CRUK0019", "CRUK0007", "CRUK0040", "CRUK0017", "CRUK0001", "CRUK0009", "CRUK0003", "CRUK0029", "CRUK0061", "CRUK0044", "CRUK0004", "CRUK0012", "CRUK0005", "CRUK0041", "CRUK0018", "CRUK0046", "CRUK0048", "CRUK0032", "CRUK0010", "CRUK0013", "CRUK0028", "CRUK0002", "CRUK0022", "CRUK0037", "CRUK0055", "CRUK0023", "CRUK0036", "CRUK0042", "CRUK0053", "CRUK0057", "CRUK0025", "CRUK0050", "CRUK0021", "CRUK0058", "CRUK0011", "CRUK0043", "CRUK0049", "CRUK0054", "CRUK0056", "CRUK0059")
+ colOrder <- c(c("CRUK0086","CRUK0074","CRUK0068","CRUK0079","CRUK0069","CRUK0064","CRUK0083","CRUK0072","CRUK0073","CRUK0081","CRUK0065","CRUK0078","CRUK0067","CRUK0070","CRUK0062","CRUK0082","CRUK0076","CRUK0071","CRUK0066","CRUK0084","CRUK0090","CRUK0075","CRUK0063","CRUK0089","CRUK0085","CRUK0088","CRUK0087","CRUK0092","CRUK0077","CRUK0080","CRUK0093","CRUK0091"))
+ tmp_df <- data.frame(colnames(all_uncondensed_alterations)[which(substr(colnames(all_uncondensed_alterations), 1, 8) %in% colOrder)], stringsAsFactors = FALSE)
+ tmp_df$SampleID <- substr(tmp_df[,1], 1, 8)
+ tmp_df$SampleID <- factor(tmp_df$SampleID, levels = colOrder)
+ tmp_df <- tmp_df[order(tmp_df$SampleID),]
+ uncondensed_alterations <- all_uncondensed_alterations[,tmp_df[,1]]
+ # mutations annotated as 0.5
+ uncondensed_beep <- uncondensed_alterations
+ uncondensed_beep[which(uncondensed_beep == -1)] <- 0
+ uncondensed_beep[which(uncondensed_beep == 1)] <- 0
+ layout(matrix(1:(nrow(uncondensed_alterations)), ncol = 1))
+ par(mar = c(2,2,1,2), oma = c(4,4,4,4))
+ tmp_acceptedRegions_df <- acceptedRegions_df[match(colnames(uncondensed_alterations), acceptedRegions_df$CRUKidRegion),]
+ sapply(1:nrow(uncondensed_alterations), FUN = function(x){
+ bp <- barplot(rep(1,ncol(uncondensed_alterations)),border = NA,axes = FALSE,
+ col = ifelse(as.numeric(uncondensed_alterations[x,]) == 1, yes = 'red', no = ifelse(as.numeric(uncondensed_alterations[x,]) < 0, yes = 'darkblue', no = 'lightgrey')),
+ space = unlist(sapply(tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)], FUN = function(x){c(2,rep(0.2,(x-1)))}))
+ rect(xleft = bp[,1]-0.15, xright = bp[,1]+0.15, ybottom = 0.2, ytop = uncondensed_beep[x,]*1 + 0.2,
+ col = '#008000', border = NA)
+ mtext(text = rownames(uncondensed_alterations)[x], side=2,line = -25, cex = 5, las = 2, outer = FALSE, col = 'black')
+ bp <- barplot(rep(1,ncol(uncondensed_alterations)),border = NA,axes = FALSE,space = unlist(sapply(tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)], FUN = function(x){c(2,rep(0.2,(x-1)))})), plot = FALSE)
+ nr <- tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)]
+ names(nr) <- unique(tmp_acceptedRegions_df$CRUKid)
+ tmp <- patient.summary[which(patient.summary$simple.hist == 'LUAD'),]
+ tmp$tmp <- ifelse(tmp$ClonalNeo > summary(tmp$ClonalNeo)['3rd Qu.'], yes = 'high', no = 'low')
+ survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ tmp,
+ main = "LUAD -- survival by upper quartile clonal neo",
+ col = c('darkblue', 'darkred'),
+ tmp$tmp <- ifelse(tmp$SubclonalNeo > summary(tmp$SubclonalNeo)['3rd Qu.'], yes = 'high', no = 'low')
+ survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ tmp,
+ main = "LUAD -- survival by upper quartile subclonal neo",
+ col = c('darkblue', 'darkred'),
+ tmp$tmp <- ifelse(tmp$TotalNeo > summary(tmp$TotalNeo)['3rd Qu.'], yes = 'high', no = 'low')
+ survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ tmp,
+ main = "LUAD -- survival by upper quartile total neo",
+ col = c('darkblue', 'darkred'),
+ tmp <- patient.summary[which(patient.summary$simple.hist == 'LUSC'),]
+ tmp$tmp <- ifelse(tmp$ClonalNeo > summary(tmp$ClonalNeo)['3rd Qu.'], yes = 'high', no = 'low')
+ survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ tmp,
+ main = "LUSC -- survival by upper quartile clonal neo",
+ col = c('darkblue', 'darkred'),
+ tmp$tmp <- ifelse(tmp$SubclonalNeo > summary(tmp$SubclonalNeo)['3rd Qu.'], yes = 'high', no = 'low')
+ survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ tmp,
+ main = "LUSC -- survival by upper quartile subclonal neo",
+ col = c('darkblue', 'darkred'),
+ tmp$tmp <- ifelse(tmp$TotalNeo > summary(tmp$TotalNeo)['3rd Qu.'], yes = 'high', no = 'low')
+ survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ tmp,
+ main = "LUAD -- survival by upper quartile total neo",
+ col = c('darkblue', 'darkred'),
+ par(mfrow = c(2,2), xpd = TRUE)
+ tmp <- patient.summary[which(patient.summary$simple.hist == 'LUAD'),]
+ luad_pvals <- data.frame(row.names = 1:max(tmp$ClonalNeo))
+ for(n in 1:max(tmp$ClonalNeo)){
+ test <- summary(coxph(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ ClonalNeo > n, data = tmp))
+ cil <- test$conf.int[3]
+ cih <- test$conf.int[4]
+ p <- test$sctest['pvalue']
+ nu <- survfit(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ ClonalNeo > n, data = tmp)$n[2]
+ test <- summary(coxph(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ SubclonalNeo > n, data = tmp))
+ hr_s <- test$conf.int[1]
+ cil_s <- test$conf.int[3]
+ cih_s <- test$conf.int[4]
+ p_s <- test$sctest['pvalue']
+ nu_s <- survfit(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ SubclonalNeo > n, data = tmp)$n[2]
+ luad_pvals[as.character(n),'hr'] <- hr
+ luad_pvals[as.character(n),'p'] <- p
+ luad_pvals[as.character(n),'cil'] <- cil
+ luad_pvals[as.character(n),'cih'] <- cih
+ luad_pvals[as.character(n),'nu'] <- nu
+ luad_pvals[as.character(n),'hr_s'] <- hr_s
+ luad_pvals[as.character(n),'p_s'] <- p_s
+ luad_pvals[as.character(n),'cil_s'] <- cil_s
+ luad_pvals[as.character(n),'cih_s'] <- cih_s
+ luad_pvals[as.character(n),'nu_s'] <- nu_s
+ keep_luad <- luad_pvals
+ luad_pvals <- luad_pvals[20:500,]
+ rbPal <- colorRampPalette(c('white','darkblue'))
+ luad_pvals$ncol <- rbPal(30)[as.numeric(cut(luad_pvals$nu,breaks = 30))]
+ luad_pvals$col <- ifelse(luad_pvals$p < 0.05, yes = 'red', no = 'black')
+ luad_pvals$cih[which(luad_pvals$cih > 4)] <- 4
+ plot(x = rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)], y = luad_pvals$hr[seq(1, length(luad_pvals$hr), 3)], ylim = c(0, 5), xlim = c(20, 500), pch = NA, ylab = 'HR', xlab = '#ClonalNeo used as threshold', las = 1)
+ #segments(x0=as.numeric(rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)]), y0=luad_pvals$cil[seq(1, length(rownames(luad_pvals)), 3)],
+ # x1=as.numeric(rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)]), y1=luad_pvals$cih[seq(1, length(rownames(luad_pvals)),3)])
+ points(x = rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)], y = luad_pvals$hr[seq(1, length(luad_pvals$hr), 3)], col = luad_pvals$col[seq(1, length(luad_pvals$col), 3)], pch = 16)
+ points(x = rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)], y = rep(5, length(seq(1, length(rownames(luad_pvals)), 3))) , col = luad_pvals$ncol, pch =15)
+ points(x = as.numeric(summary(tmp$ClonalNeo))[c(2,3,5)], y = c(5,5,5), pch = 15, col = 'gold')
+ abline(h = 1, col = 'red', lty = 2)
+ text(x = 100, y = 4.9, labels = 'number patients in high neo group', cex = 0.7)
+ luad_pvals$ncol_s <- rbPal(30)[as.numeric(cut(luad_pvals$nu_s,breaks = 30))]
+ luad_pvals$col_s <- ifelse(luad_pvals$p_s < 0.05, yes = 'red', no = 'black')
+ luad_pvals$cih_s[which(luad_pvals$cih_s > 4)] <- 4
+ plot(x = rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)], y = luad_pvals$hr_s[seq(1, length(luad_pvals$hr_s), 3)], ylim = c(0, 4), xlim = c(20, 500), pch = NA, ylab = 'HR', xlab = '#SubclonalNeo used as threshold', las = 1)
+ #segments(x0=as.numeric(rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)]), y0=luad_pvals$cil_s[seq(1, length(rownames(luad_pvals)), 3)],
+ # x1=as.numeric(rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)]), y1=luad_pvals$cih_s[seq(1, length(rownames(luad_pvals)),3)])
+ points(x = rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)], y = luad_pvals$hr_s[seq(1, length(luad_pvals$hr_s), 3)], col = luad_pvals$col_s[seq(1, length(luad_pvals$col_s), 3)], pch = 16)
+ points(x = rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)], y = rep(5, length(seq(1, length(rownames(luad_pvals)), 3))) , col = luad_pvals$ncol_s, pch =15)
+ points(x = as.numeric(summary(tmp$SubclonalNeo))[c(2,3,5)], y = c(5,5,5), pch = 15, col = 'gold')
+ abline(h = 1, col = 'red', lty = 2)
+ text(x = 100, y = 4.9, labels = 'number patients in high neo group', cex = 0.7)
+ luad_pvals <- keep_luad
+ tmp <- patient.summary[which(patient.summary$simple.hist == 'LUSC'),]
+ lusc_pvals <- data.frame(row.names = 1:max(tmp$ClonalNeo))
+ for(n in 1:max(tmp$ClonalNeo)){
+ test <- summary(coxph(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ ClonalNeo > n, data = tmp))
+ cil <- test$conf.int[3]
+ cih <- test$conf.int[4]
+ p <- test$sctest['pvalue']
+ nu <- survfit(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ ClonalNeo > n, data = tmp)$n[2]
+ test <- summary(coxph(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ SubclonalNeo > n, data = tmp))
+ hr_s <- test$conf.int[1]
+ cil_s <- test$conf.int[3]
+ cih_s <- test$conf.int[4]
+ p_s <- test$sctest['pvalue']
+ nu_s <- survfit(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ SubclonalNeo > n, data = tmp)$n[2]
+ lusc_pvals[as.character(n),'hr'] <- hr
+ lusc_pvals[as.character(n),'p'] <- p
+ lusc_pvals[as.character(n),'cil'] <- cil
+ lusc_pvals[as.character(n),'cih'] <- cih
+ lusc_pvals[as.character(n),'nu'] <- nu
+ lusc_pvals[as.character(n),'hr_s'] <- hr_s
+ lusc_pvals[as.character(n),'p_s'] <- p_s
+ lusc_pvals[as.character(n),'cil_s'] <- cil_s
+ lusc_pvals[as.character(n),'cih_s'] <- cih_s
+ lusc_pvals[as.character(n),'nu_s'] <- nu_s
+ keep_lusc <- lusc_pvals
+ lusc_pvals <- lusc_pvals[20:500,]
+ lusc_pvals <- keep_lusc[!is.na(keep_lusc$nu),]
+ lusc_pvals$ncol <- rbPal(30)[as.numeric(cut(lusc_pvals$nu,breaks = 30))]
+ lusc_pvals$col <- ifelse(lusc_pvals$p < 0.05, yes = 'red', no = 'black')
+ lusc_pvals$cih[which(lusc_pvals$cih > 2)] <- 2
+ plot(x = rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)], y = lusc_pvals$hr[seq(1, length(lusc_pvals$hr), 3)], ylim = c(0, 2), xlim = c(33, 474), pch = NA, ylab = 'HR', xlab = '#ClonalNeo used as threshold', las = 1)
+ #segments(x0=as.numeric(rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)]), y0=lusc_pvals$cil[seq(1, length(rownames(lusc_pvals)), 3)],
+ # x1=as.numeric(rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)]), y1=lusc_pvals$cih[seq(1, length(rownames(lusc_pvals)),3)])
+ points(x = rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)], y = lusc_pvals$hr[seq(1, length(lusc_pvals$hr), 3)], col = lusc_pvals$col[seq(1, length(lusc_pvals$col), 3)], pch = 16)
+ points(x = rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)], y = rep(2.5, length(seq(1, length(rownames(lusc_pvals)), 3))) , col = lusc_pvals$ncol, pch =15)
+ points(x = as.numeric(summary(tmp$ClonalNeo))[c(2,3,5)], y = c(2.5,2.5,2.5), pch = 15, col = 'gold')
+ abline(h = 1, col = 'red', lty = 2)
+ text(x = 120, y = 2.4, labels = 'number patients in high neo group', cex = 0.7)
+ lusc_pvals <- keep_lusc[!is.na(keep_lusc$nu_s),]
+ lusc_pvals$ncol_s <- rbPal(30)[as.numeric(cut(lusc_pvals$nu_s,breaks = 30))]
+ lusc_pvals$col_s <- ifelse(lusc_pvals$p_s < 0.05, yes = 'red', no = 'black')
+ lusc_pvals$cih_s[which(lusc_pvals$cih_s > 5)] <- 5
+ plot(x = rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)], y = lusc_pvals$hr_s[seq(1, length(lusc_pvals$hr_s), 3)], ylim = c(0, 8), xlim = c(0, 355), col = lusc_pvals$col_s[seq(1, length(lusc_pvals$col_s), 3)], pch = 16, ylab = 'HR', xlab = '#SubclonalNeo used as threshold', las = 1)
+ #segments(x0=as.numeric(rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)]), y0=lusc_pvals$cil_s[seq(1, length(rownames(lusc_pvals)), 3)],
+ # x1=as.numeric(rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)]), y1=lusc_pvals$cih_s[seq(1, length(rownames(lusc_pvals)),3)])
+ points(x = rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)], y = lusc_pvals$hr_s[seq(1, length(lusc_pvals$hr_s), 3)], col = lusc_pvals$col_s[seq(1, length(lusc_pvals$col_s), 3)], pch = 16)
+ points(x = rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)], y = rep(6, length(seq(1, length(rownames(lusc_pvals)), 3))) , col = lusc_pvals$ncol_s, pch =15)
+ points(x = as.numeric(summary(tmp$SubclonalNeo))[c(2,3,5)], y = c(6,6,6), pch = 15, col = 'gold')
+ abline(h = 1, col = 'red', lty = 2)
+ text(x = 100, y = 5.9, labels = 'number patients in high neo group', cex = 0.7)
+ tmp <- patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),]
+ tmp$low_evasion_clonal <- ifelse(tmp$low_evasion == TRUE | tmp$high_clonal == TRUE, yes =TRUE, no = FALSE)
+ tmp$histology <- tmp$simple.hist
+ histology <- factor(histology)
+ sex <- factor(sex, labels = c("Female", "Male"))
+ stage <- factor(allan_stage_no_3b, levels = c('1a', '1b', '2a', '2b', '3a'), labels = c('1a', '1b', '2a', '2b', '3'))
+ Adjuvant.therapy <- factor(Adjuvant.therapy, levels = c('No adjuvant treatment', 'Adjuvant'))
+ low_evasion <- factor(low_evasion, levels = c('FALSE', 'TRUE'))
+ low_evasion_clonal <- factor(low_evasion_clonal, levels = c('FALSE', 'TRUE'))
+ number.unique.hlas <- as.numeric(number.unique.hlas)
+ tmp_model <- coxph(Surv(allan_final_dsf_time, allan_final_dsf == '1') ~ histology + age + sex + stage + packyears + Adjuvant.therapy + low_evasion_clonal, data=tmp)
+ ggforest(tmp_model, data = NULL, main = "Hazard ratio", fontsize = 0.7, refLabel = "reference", noDigits = 2)
+tmp_model <- coxph(Surv(allan_final_dsf_time, allan_final_dsf == '1') ~ histology + age + sex + stage + packyears + Adjuvant.therapy + low_evasion + number.unique.hlas, data=tmp)
+ggforest(tmp_model, data = NULL, main = "Hazard ratio", fontsize = 0.7, refLabel = "reference", noDigits = 2)