Snippet revisions — Bitbucket

+library(dendextend)

+library(ComplexHeatmap)

+library(beeswarm)

+library(survplot)

+library(survminer)

+library(corrplot)

+library(plyr)

+# scripts

+boop.dir <- function(working.dir, set.dir = FALSE){

+  if( !file.exists(working.dir)){

+    if( !dir.create(working.dir, recursive = TRUE) ){

+      stop(paste("Unable to create directory: ", working.dir, "!\n", sep = ''))

+    }

+  }

+  if(set.dir == TRUE){

+    setwd(working.dir)

+  }

+}

+get_split <- function(x, sp = "-", val = 1){

+  out.split = unlist(strsplit(x, split = sp, fixed = TRUE))[val]

+  return(out.split)

+}

+split.n.paste <- function(str, sp = ":", start = 1, end = 3, non.sequential = NULL, co = ':'){

+  boop <- strsplit(x = str, split = sp)

+  boop <- unlist(boop)

+  if(is.null(non.sequential)){

+    boop <- paste(boop[start:end], collapse = co)

+  }

+  if(!is.null(non.sequential)){

+    boop <- paste(boop[non.sequential], collapse = co)

+  }

+  return(boop)

+}

+classify_based_on_TILs <- function(df){

+  df              <- df[!is.na(df$cd8.score.danaher),]

+  #df              <- df[!is.na(df$pathology_TILs),]

+  df_summary      <- aggregate(df$pathology_TILs, by = list(df$immune_cluster), FUN = median, na.rm=TRUE)

+  df_summary$diff <- abs(tmpTILs - df_summary$x)

+  return(df_summary$Group.1[which.min(df_summary$diff)])

+}

+cor.mtest <- function(mat, ...) {

+  mat <- as.matrix(mat)

+  n <- ncol(mat)

+  p.mat <- lowCI.mat <- uppCI.mat <- matrix(NA, n, n)

+  diag(p.mat) <- 0

+  diag(lowCI.mat) <- diag(uppCI.mat) <- 1

+  for (i in 1:(n - 1)) {

+    for (j in (i + 1):n) {

+      tmp <- cor.test(x = mat[,i], y = mat[,j], ...)

+      p.mat[i,j] <- p.mat[j,i] <- tmp$p.value

+      # only "pearson" method provides confidence intervals

+      if (!is.null(tmp$conf.int)) {

+        lowCI.mat[i,j] <- lowCI.mat[j,i] <- tmp$conf.int[1]

+        uppCI.mat[i,j] <- uppCI.mat[j,i] <- tmp$conf.int[2]

+      }

+    }

+  }

+  list(

+    p = p.mat,

+    lowCI = lowCI.mat,

+    uppCI = uppCI.mat

+  )

+}

+plot.with.confidence <- function(x, y, x2 = NULL, y2 = NULL, lab = NULL, ci = 0.95, ...){

+  mini_function <- function(x, y, lab = NULL, ci, ...){

+    # NA's break it

+    z <- c(which(is.na(x)), which(is.na(y)))

+    z <- unique(z)

+    if(length(z) != 0){

+      x <- x[-z]

+      y <- y[-z]

+      lab <- lab[-z]

+    }

+    df <- data.frame(x, y)

+    mod <- lm(y ~ x, data = df)

+    # get predictions

+    new.x <- seq(min(df$x), max(df$x), length.out=length(x))

+    #new.y <- seq(min(y), max(y), length.out = length(y))

+    preds <- predict(mod, newdata = data.frame(x = new.x), interval = 'confidence', level = ci)

+    polygon(c(rev(new.x), new.x), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey90', border = NA)

+    # line

+    abline(mod)

+    # intervals

+    lines(new.x, preds[ ,3], lty = 'dashed', col = 'red')

+    lines(new.x, preds[ ,2], lty = 'dashed', col = 'red')

+    points(x, y, ...)

+    cor.p <- cor.test(x,y, method = 's')$p.value

+    cor.r <- cor.test(x,y, method = 's')$estimate

+    xmin <- par("usr")[1]

+    xmax <- par("usr")[2]

+    ymin <- par("usr")[3]

+    ymax <- par("usr")[4]

+    text(x = 0.7*xmax, y = 0.93*ymax, labels = paste('p: ', formatC(cor.p, format = 'e', digits = 1), '\n', 'rho: ', round(cor.r, digits = 2), sep= ''), pos = 4)

+    # predict the original values

+    real.mod <- predict(mod, level = 0.95, interval = "confidence")

+    # and are the data points inside the CI or outside of it

+    inside.interval <- real.mod[,'lwr'] < y & y < real.mod[,'upr']

+    if(!is.null(lab)){

+      inside.patients  <- lab[inside.interval]

+      outside.patients <- lab[!inside.interval]

+    }

+    if(is.null(lab)){

+      inside.patients  <- c()

+      outside.patients <- c()

+    }

+    full.model <- list(x, y, lab, new.x, preds, mod, inside.patients, outside.patients)

+    names(full.model) <- c('x', 'y', 'lab', 'new.x', 'preds', 'mod', 'inside.patients', 'outside.patients')

+    return(full.model)

+  }

+  # plot empty

+  plot(x = c(x, x2), y = c(y, y2), type = 'n', ...)

+  mini_function(x = x, y = y, lab = lab, ci = ci, ...)

+  if(!is.null(x2) & !is.null(y2)){

+    mini_function(x = x2, y = y2, lab = lab, ci = ci, ...)

+  }

+}

+heatmap.3 <- function(x,

+                      Rowv = TRUE, Colv = if (symm) "Rowv" else TRUE,

+                      distfun = dist,

+                      hclustfun = hclust,

+                      dendrogram = c("both","row", "column", "none"),

+                      symm = FALSE,

+                      scale = c("none","row", "column"),

+                      na.rm = TRUE,

+                      revC = identical(Colv,"Rowv"),

+                      add.expr,

+                      breaks,

+                      symbreaks = max(x < 0, na.rm = TRUE) || scale != "none",

+                      col = "heat.colors",

+                      colsep,

+                      rowsep,

+                      sepcolor = "white",

+                      sepwidth = c(0.05, 0.05),

+                      cellnote,

+                      notecex = 1,

+                      notecol = "cyan",

+                      na.color = par("bg"),

+                      trace = c("none", "column","row", "both"),

+                      tracecol = "cyan",

+                      hline = median(breaks),

+                      vline = median(breaks),

+                      linecol = tracecol,

+                      margins = c(5,5),

+                      ColSideColors,

+                      RowSideColors,

+                      side.height.fraction=0.3,

+                      cexRow = 0.2 + 1/log10(nr),

+                      cexCol = 0.2 + 1/log10(nc),

+                      labRow = NULL,

+                      labCol = NULL,

+                      key = TRUE,

+                      keysize = 1.5,

+                      density.info = c("none", "histogram", "density"),

+                      denscol = tracecol,

+                      symkey = max(x < 0, na.rm = TRUE) || symbreaks,

+                      densadj = 0.25,

+                      main = NULL,

+                      xlab = NULL,

+                      ylab = NULL,

+                      lmat = NULL,

+                      lhei = NULL,

+                      lwid = NULL,

+                      ColSideColorsSize = 1,

+                      RowSideColorsSize = 1,

+                      KeyValueName="Value",...){

+  invalid <- function (x) {

+    if (missing(x) || is.null(x) || length(x) == 0)

+      return(TRUE)

+    if (is.list(x))

+      return(all(sapply(x, invalid)))

+    else if (is.vector(x))

+      return(all(is.na(x)))

+    else return(FALSE)

+  }

+  x <- as.matrix(x)

+  scale01 <- function(x, low = min(x), high = max(x)) {

+    x <- (x - low)/(high - low)

+    x

+  }

+  retval <- list()

+  scale <- if (symm && missing(scale))

+    "none"

+  else match.arg(scale)

+  dendrogram <- match.arg(dendrogram)

+  trace <- match.arg(trace)

+  density.info <- match.arg(density.info)

+  if (length(col) == 1 && is.character(col))

+    col <- get(col, mode = "function")

+  if (!missing(breaks) && (scale != "none"))

+    warning("Using scale=\"row\" or scale=\"column\" when breaks are",

+            "specified can produce unpredictable results.", "Please consider using only one or the other.")

+  if (is.null(Rowv) || is.na(Rowv))

+    Rowv <- FALSE

+  if (is.null(Colv) || is.na(Colv))

+    Colv <- FALSE

+  else if (Colv == "Rowv" && !isTRUE(Rowv))

+    Colv <- FALSE

+  if (length(di <- dim(x)) != 2 || !is.numeric(x))

+    stop("`x' must be a numeric matrix")

+  nr <- di[1]

+  nc <- di[2]

+  if (nr <= 1 || nc <= 1)

+    stop("`x' must have at least 2 rows and 2 columns")

+  if (!is.numeric(margins) || length(margins) != 2)

+    stop("`margins' must be a numeric vector of length 2")

+  if (missing(cellnote))

+    cellnote <- matrix("", ncol = ncol(x), nrow = nrow(x))

+  if (!inherits(Rowv, "dendrogram")) {

+    if (((!isTRUE(Rowv)) || (is.null(Rowv))) && (dendrogram %in%

+                                                 c("both", "row"))) {

+      if (is.logical(Colv) && (Colv))

+        dendrogram <- "column"

+      else dedrogram <- "none"

+      warning("Discrepancy: Rowv is FALSE, while dendrogram is `",

+              dendrogram, "'. Omitting row dendogram.")

+    }

+  }

+  if (!inherits(Colv, "dendrogram")) {

+    if (((!isTRUE(Colv)) || (is.null(Colv))) && (dendrogram %in%

+                                                 c("both", "column"))) {

+      if (is.logical(Rowv) && (Rowv))

+        dendrogram <- "row"

+      else dendrogram <- "none"

+      warning("Discrepancy: Colv is FALSE, while dendrogram is `",

+              dendrogram, "'. Omitting column dendogram.")

+    }

+  }

+  if (inherits(Rowv, "dendrogram")) {

+    ddr <- Rowv

+    rowInd <- order.dendrogram(ddr)

+  }

+  else if (is.integer(Rowv)) {

+    hcr <- hclustfun(distfun(x))

+    ddr <- as.dendrogram(hcr)

+    ddr <- reorder(ddr, Rowv)

+    rowInd <- order.dendrogram(ddr)

+    if (nr != length(rowInd))

+      stop("row dendrogram ordering gave index of wrong length")

+  }

+  else if (isTRUE(Rowv)) {

+    Rowv <- rowMeans(x, na.rm = na.rm)

+    hcr <- hclustfun(distfun(x))

+    ddr <- as.dendrogram(hcr)

+    ddr <- reorder(ddr, Rowv)

+    rowInd <- order.dendrogram(ddr)

+    if (nr != length(rowInd))

+      stop("row dendrogram ordering gave index of wrong length")

+  }

+  else {

+    rowInd <- nr:1

+  }

+  if (inherits(Colv, "dendrogram")) {

+    ddc <- Colv

+    colInd <- order.dendrogram(ddc)

+  }

+  else if (identical(Colv, "Rowv")) {

+    if (nr != nc)

+      stop("Colv = \"Rowv\" but nrow(x) != ncol(x)")

+    if (exists("ddr")) {

+      ddc <- ddr

+      colInd <- order.dendrogram(ddc)

+    }

+    else colInd <- rowInd

+  }

+  else if (is.integer(Colv)) {

+    hcc <- hclustfun(distfun(if (symm)

+      x

+      else t(x)))

+    ddc <- as.dendrogram(hcc)

+    ddc <- reorder(ddc, Colv)

+    colInd <- order.dendrogram(ddc)

+    if (nc != length(colInd))

+      stop("column dendrogram ordering gave index of wrong length")

+  }

+  else if (isTRUE(Colv)) {

+    Colv <- colMeans(x, na.rm = na.rm)

+    hcc <- hclustfun(distfun(if (symm)

+      x

+      else t(x)))

+    ddc <- as.dendrogram(hcc)

+    ddc <- reorder(ddc, Colv)

+    colInd <- order.dendrogram(ddc)

+    if (nc != length(colInd))

+      stop("column dendrogram ordering gave index of wrong length")

+  }

+  else {

+    colInd <- 1:nc

+  }

+  retval$rowInd <- rowInd

+  retval$colInd <- colInd

+  retval$call <- match.call()

+  x <- x[rowInd, colInd]

+  x.unscaled <- x

+  cellnote <- cellnote[rowInd, colInd]

+  if (is.null(labRow))

+    labRow <- if (is.null(rownames(x)))

+      (1:nr)[rowInd]

+  else rownames(x)

+  else labRow <- labRow[rowInd]

+  if (is.null(labCol))

+    labCol <- if (is.null(colnames(x)))

+      (1:nc)[colInd]

+  else colnames(x)

+  else labCol <- labCol[colInd]

+  if (scale == "row") {

+    retval$rowMeans <- rm <- rowMeans(x, na.rm = na.rm)

+    x <- sweep(x, 1, rm)

+    retval$rowSDs <- sx <- apply(x, 1, sd, na.rm = na.rm)

+    x <- sweep(x, 1, sx, "/")

+  }

+  else if (scale == "column") {

+    retval$colMeans <- rm <- colMeans(x, na.rm = na.rm)

+    x <- sweep(x, 2, rm)

+    retval$colSDs <- sx <- apply(x, 2, sd, na.rm = na.rm)

+    x <- sweep(x, 2, sx, "/")

+  }

+  if (missing(breaks) || is.null(breaks) || length(breaks) < 1) {

+    if (missing(col) || is.function(col))

+      breaks <- 16

+    else breaks <- length(col) + 1

+  }

+  if (length(breaks) == 1) {

+    if (!symbreaks)

+      breaks <- seq(min(x, na.rm = na.rm), max(x, na.rm = na.rm),

+                    length = breaks)

+    else {

+      extreme <- max(abs(x), na.rm = TRUE)

+      breaks <- seq(-extreme, extreme, length = breaks)

+    }

+  }

+  nbr <- length(breaks)

+  ncol <- length(breaks) - 1

+  if (class(col) == "function")

+    col <- col(ncol)

+  min.breaks <- min(breaks)

+  max.breaks <- max(breaks)

+  x[x < min.breaks] <- min.breaks

+  x[x > max.breaks] <- max.breaks

+  if (missing(lhei) || is.null(lhei))

+    lhei <- c(keysize, 4)

+  if (missing(lwid) || is.null(lwid))

+    lwid <- c(keysize, 4)

+  if (missing(lmat) || is.null(lmat)) {

+    lmat <- rbind(4:3, 2:1)

+    if (!missing(ColSideColors)) {

+      #if (!is.matrix(ColSideColors))

+      #stop("'ColSideColors' must be a matrix")

+      if (!is.character(ColSideColors) || nrow(ColSideColors) != nc)

+        stop("'ColSideColors' must be a matrix of nrow(x) rows")

+      lmat <- rbind(lmat[1, ] + 1, c(NA, 1), lmat[2, ] + 1)

+      #lhei <- c(lhei[1], 0.2, lhei[2])

+      lhei=c(lhei[1], side.height.fraction*ColSideColorsSize/2, lhei[2])

+    }

+    if (!missing(RowSideColors)) {

+      #if (!is.matrix(RowSideColors))

+      #stop("'RowSideColors' must be a matrix")

+      if (!is.character(RowSideColors) || ncol(RowSideColors) != nr)

+        stop("'RowSideColors' must be a matrix of ncol(x) columns")

+      lmat <- cbind(lmat[, 1] + 1, c(rep(NA, nrow(lmat) - 1), 1), lmat[,2] + 1)

+      #lwid <- c(lwid[1], 0.2, lwid[2])

+      lwid <- c(lwid[1], side.height.fraction*RowSideColorsSize/2, lwid[2])

+    }

+    lmat[is.na(lmat)] <- 0

+  }

+  if (length(lhei) != nrow(lmat))

+    stop("lhei must have length = nrow(lmat) = ", nrow(lmat))

+  if (length(lwid) != ncol(lmat))

+    stop("lwid must have length = ncol(lmat) =", ncol(lmat))

+  op <- par(no.readonly = TRUE)

+  on.exit(par(op))

+  layout(lmat, widths = lwid, heights = lhei, respect = FALSE)

+  if (!missing(RowSideColors)) {

+    if (!is.matrix(RowSideColors)){

+      par(mar = c(margins[1], 0, 0, 0.5))

+      image(rbind(1:nr), col = RowSideColors[rowInd], axes = FALSE)

+    } else {

+      par(mar = c(margins[1], 0, 0, 0.5))

+      rsc = t(RowSideColors[,rowInd, drop=F])

+      rsc.colors = matrix()

+      rsc.names = names(table(rsc))

+      rsc.i = 1

+      for (rsc.name in rsc.names) {

+        rsc.colors[rsc.i] = rsc.name

+        rsc[rsc == rsc.name] = rsc.i

+        rsc.i = rsc.i + 1

+      }

+      rsc = matrix(as.numeric(rsc), nrow = dim(rsc)[1])

+      image(t(rsc), col = as.vector(rsc.colors), axes = FALSE)

+      if (length(rownames(RowSideColors)) > 0) {

+        axis(1, 0:(dim(rsc)[2] - 1)/max(1,(dim(rsc)[2] - 1)), rownames(RowSideColors), las = 2, tick = FALSE)

+      }

+    }

+  }

+  if (!missing(ColSideColors)) {

+    if (!is.matrix(ColSideColors)){

+      par(mar = c(0.5, 0, 0, margins[2]))

+      image(cbind(1:nc), col = ColSideColors[colInd], axes = FALSE)

+    } else {

+      par(mar = c(0.5, 0, 0, margins[2]))

+      csc = ColSideColors[colInd, , drop=F]

+      csc.colors = matrix()

+      csc.names = names(table(csc))

+      csc.i = 1

+      for (csc.name in csc.names) {

+        csc.colors[csc.i] = csc.name

+        csc[csc == csc.name] = csc.i

+        csc.i = csc.i + 1

+      }

+      csc = matrix(as.numeric(csc), nrow = dim(csc)[1])

+      image(csc, col = as.vector(csc.colors), axes = FALSE)

+      if (length(colnames(ColSideColors)) > 0) {

+        axis(2, 0:(dim(csc)[2] - 1)/max(1,(dim(csc)[2] - 1)), colnames(ColSideColors), las = 2, tick = FALSE)

+      }

+    }

+  }

+  par(mar = c(margins[1], 0, 0, margins[2]))

+  x <- t(x)

+  cellnote <- t(cellnote)

+  if (revC) {

+    iy <- nr:1

+    if (exists("ddr"))

+      ddr <- rev(ddr)

+    x <- x[, iy]

+    cellnote <- cellnote[, iy]

+  }

+  else iy <- 1:nr

+  image(1:nc, 1:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 + c(0, nr), axes = FALSE, xlab = "", ylab = "", col = col, breaks = breaks, ...)

+  retval$carpet <- x

+  if (exists("ddr"))

+    retval$rowDendrogram <- ddr

+  if (exists("ddc"))

+    retval$colDendrogram <- ddc

+  retval$breaks <- breaks

+  retval$col <- col

+  if (!invalid(na.color) & any(is.na(x))) { # load library(gplots)

+    mmat <- ifelse(is.na(x), 1, NA)

+    image(1:nc, 1:nr, mmat, axes = FALSE, xlab = "", ylab = "",

+          col = na.color, add = TRUE)

+  }

+  axis(1, 1:nc, labels = labCol, las = 2, line = -0.5, tick = 0,

+       cex.axis = cexCol)

+  if (!is.null(xlab))

+    mtext(xlab, side = 1, line = margins[1] - 1.25)

+  axis(4, iy, labels = labRow, las = 2, line = -0.5, tick = 0,

+       cex.axis = cexRow)

+  if (!is.null(ylab))

+    mtext(ylab, side = 4, line = margins[2] - 1.25)

+  if (!missing(add.expr))

+    eval(substitute(add.expr))

+  if (!missing(colsep))

+    for (csep in colsep) rect(xleft = csep + 0.5, ybottom = rep(0, length(csep)), xright = csep + 0.5 + sepwidth[1], ytop = rep(ncol(x) + 1, csep), lty = 1, lwd = 1, col = sepcolor, border = sepcolor)

+  if (!missing(rowsep))

+    for (rsep in rowsep) rect(xleft = 0, ybottom = (ncol(x) + 1 - rsep) - 0.5, xright = nrow(x) + 1, ytop = (ncol(x) + 1 - rsep) - 0.5 - sepwidth[2], lty = 1, lwd = 1, col = sepcolor, border = sepcolor)

+  min.scale <- min(breaks)

+  max.scale <- max(breaks)

+  x.scaled <- scale01(t(x), min.scale, max.scale)

+  if (trace %in% c("both", "column")) {

+    retval$vline <- vline

+    vline.vals <- scale01(vline, min.scale, max.scale)

+    for (i in colInd) {

+      if (!is.null(vline)) {

+        abline(v = i - 0.5 + vline.vals, col = linecol,

+               lty = 2)

+      }

+      xv <- rep(i, nrow(x.scaled)) + x.scaled[, i] - 0.5

+      xv <- c(xv[1], xv)

+      yv <- 1:length(xv) - 0.5

+      lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")

+    }

+  }

+  if (trace %in% c("both", "row")) {

+    retval$hline <- hline

+    hline.vals <- scale01(hline, min.scale, max.scale)

+    for (i in rowInd) {

+      if (!is.null(hline)) {

+        abline(h = i + hline, col = linecol, lty = 2)

+      }

+      yv <- rep(i, ncol(x.scaled)) + x.scaled[i, ] - 0.5

+      yv <- rev(c(yv[1], yv))

+      xv <- length(yv):1 - 0.5

+      lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")

+    }

+  }

+  if (!missing(cellnote))

+    text(x = c(row(cellnote)), y = c(col(cellnote)), labels = c(cellnote),

+         col = notecol, cex = notecex)

+  par(mar = c(margins[1], 0, 0, 0))

+  if (dendrogram %in% c("both", "row")) {

+    plot(ddr, horiz = TRUE, axes = FALSE, yaxs = "i", leaflab = "none")

+  }

+  else plot.new()

+  par(mar = c(0, 0, if (!is.null(main)) 5 else 0, margins[2]))

+  if (dendrogram %in% c("both", "column")) {

+    plot(ddc, axes = FALSE, xaxs = "i", leaflab = "none")

+  }

+  else plot.new()

+  if (!is.null(main))

+    title(main, cex.main = 1.5 * op[["cex.main"]])

+  if (key) {

+    par(mar = c(5, 4, 2, 1), cex = 0.75)

+    tmpbreaks <- breaks

+    if (symkey) {

+      max.raw <- max(abs(c(x, breaks)), na.rm = TRUE)

+      min.raw <- -max.raw

+      tmpbreaks[1] <- -max(abs(x), na.rm = TRUE)

+      tmpbreaks[length(tmpbreaks)] <- max(abs(x), na.rm = TRUE)

+    }

+    else {

+      min.raw <- min(x, na.rm = TRUE)

+      max.raw <- max(x, na.rm = TRUE)

+    }

+    z <- seq(min.raw, max.raw, length = length(col))

+    image(z = matrix(z, ncol = 1), col = col, breaks = tmpbreaks,

+          xaxt = "n", yaxt = "n")

+    par(usr = c(0, 1, 0, 1))

+    lv <- pretty(breaks)

+    xv <- scale01(as.numeric(lv), min.raw, max.raw)

+    axis(1, at = xv, labels = lv)

+    if (scale == "row")

+      mtext(side = 1, "Row Z-Score", line = 2)

+    else if (scale == "column")

+      mtext(side = 1, "Column Z-Score", line = 2)

+    else mtext(side = 1, KeyValueName, line = 2)

+    if (density.info == "density") {

+      dens <- density(x, adjust = densadj, na.rm = TRUE)

+      omit <- dens$x < min(breaks) | dens$x > max(breaks)

+      dens$x <- dens$x[-omit]

+      dens$y <- dens$y[-omit]

+      dens$x <- scale01(dens$x, min.raw, max.raw)

+      lines(dens$x, dens$y/max(dens$y) * 0.95, col = denscol,

+            lwd = 1)

+      axis(2, at = pretty(dens$y)/max(dens$y) * 0.95, pretty(dens$y))

+      title("Color Key\nand Density Plot")

+      par(cex = 0.5)

+      mtext(side = 2, "Density", line = 2)

+    }

+    else if (density.info == "histogram") {

+      h <- hist(x, plot = FALSE, breaks = breaks)

+      hx <- scale01(breaks, min.raw, max.raw)

+      hy <- c(h$counts, h$counts[length(h$counts)])

+      lines(hx, hy/max(hy) * 0.95, lwd = 1, type = "s",

+            col = denscol)

+      axis(2, at = pretty(hy)/max(hy) * 0.95, pretty(hy))

+      title("Color Key\nand Histogram")

+      par(cex = 0.5)

+      mtext(side = 2, "Count", line = 2)

+    }

+    else title("Color Key")

+  }

+  else plot.new()

+  retval$colorTable <- data.frame(low = retval$breaks[-length(retval$breaks)],

+                                  high = retval$breaks[-1], color = retval$col)

+  invisible(retval)

+}

+output.dir           <- '~/Google Drive/SwantonLab/Rimages/test/'

+boop.dir(output.dir)

+figure.dir           <- paste(output.dir, 'figures-jan15/', sep = '')

+boop.dir(figure.dir)

+# load some data -- to be saved in an .RData file later #

+load('~/Google Drive/SwantonLab/Rimages/2019-01-25-allDataForPlots-again.RData')

+orig_par <- par()

+# ok doing things

+all_danaher <- immune.subsets[,c(grep(pattern = 'danah', colnames(immune.subsets), value = TRUE))]

+all_danaher <- all_danaher[,c(grep(pattern = 'resid', colnames(all_danaher), value = TRUE, invert = TRUE))]

+colnames(all_danaher) <- sapply(colnames(all_danaher), FUN = get_split, sp = '.score', val = 1)

+all_danaher_accepted <- all_danaher

+# by subtypes

+luad_danaher <- all_danaher_accepted[which(substr(rownames(all_danaher_accepted), 1, 8) %in% patient.summary$CRUKid[which(patient.summary$simple.hist %in%  c('LUAD'))]),]

+luad_danaher_scaled <- scale(luad_danaher)

+lusc_danaher <- all_danaher_accepted[which(substr(rownames(all_danaher_accepted), 1, 8) %in% patient.summary$CRUKid[which(patient.summary$simple.hist == 'LUSC')]),]

+lusc_danaher_scaled <- scale(lusc_danaher)

+other_danaher <- all_danaher_accepted[which(substr(rownames(all_danaher_accepted), 1, 8) %in% patient.summary$CRUKid[which(patient.summary$simple.hist == 'other')]),]

+other_danaher_scaled <- scale(other_danaher)

+# empty variables

+all_clusters         <- c()

+all_classification   <- c()

+all_immune_distances <- c()

+hmcols         <- colorRampPalette(c('dark blue', 'white','dark red'))(100)

+#lusc

+hm   <- heatmap.3(t(lusc_danaher_scaled), trace = 'none', hclustfun = function(x) hclust(x, method= 'ward.D2'), dendrogram = 'column', col = hmcols, na.rm=TRUE)

+hm_d <- lapply(unique(substr(rownames(lusc_danaher_scaled), 1, 8)), FUN = function(x) {dist(lusc_danaher_scaled[grep(pattern = x, rownames(lusc_danaher_scaled)),,drop=FALSE])} )

+names(hm_d)   <- unique(substr(rownames(lusc_danaher_scaled), 1, 8))

+all_immune_distances <- c(all_immune_distances, hm_d)

+tmp_patients <- unique(substr(rownames(hm$carpet), 1, 8))

+tmp_clusters <- cutree(hm$colDendrogram, k = 2)

+bp <- boxplot(cd8 ~ tmp_clusters, data = lusc_danaher_scaled, plot=FALSE)

+tmp_clusters <- ifelse(tmp_clusters == which.min(bp$stats[3,]), yes = 1, no = 2)

+all_clusters <- c(all_clusters, tmp_clusters)

+tmp_df       <- as.data.frame(matrix(data = 0, ncol = length(tmp_patients), nrow = length(tmp_clusters), dimnames = list(names(tmp_clusters), tmp_patients)))

+for(x in rownames(tmp_df)){

+  tmp_df[x,substr(x, 1, 8)] <- tmp_clusters[x]

+}

+# classify groups

+tmp_classify <- apply(tmp_df, MARGIN = 2, table)

+tmp_classify_col <- ifelse(lapply(tmp_classify, length) == 3, yes = '#df9b2f', no = ifelse(unlist(lapply(tmp_classify, FUN = function(x) any(grepl(pattern = 1, x = names(x))))), yes = '#30ABDF', no = '#BD2131'))

+all_classification <- c(all_classification, tmp_classify_col)

+tmp_classify_col <- data.frame(tmp_classify_col, stringsAsFactors = FALSE)

+tmp_classify_col$tmp <- sapply(rownames(tmp_classify_col), FUN = function(x){which(colnames(tmp_df) == x)})

+lusc_order <- rownames(tmp_classify_col)[with(tmp_classify_col, order(factor(tmp_classify_col, levels = c('#30ABDF', '#df9b2f', '#BD2131')), tmp))]

+tmp_df     <- tmp_df[,lusc_order]

+# and remove the unnecessary column and re-order the colors

+tmp_classify_col$tmp <- NULL

+tmp_classify_col <- tmp_classify_col[lusc_order,,drop=FALSE]

+# annotate

+col_list <- lapply(colnames(tmp_df), FUN = function(x){return(c("1" = '#30ABDF', "0" = "#e9e9e9", "2" = '#BD2131'))})

+names(col_list) <- colnames(tmp_df)

+ha <- HeatmapAnnotation(tmp_df, show_legend = FALSE, gap = unit(0, 'mm'),show_annotation_name = TRUE, which = 'column', gp = gpar(col = 'white'), annotation_name_gp = gpar(cex = 0.6, col = tmp_classify_col$tmp_classify_col), na_col = 'brown',

+                        col = col_list)

+hm_lusc <- Heatmap(t(lusc_danaher_scaled),

+                   col = circlize::colorRamp2(c(-3.5, 0, 3.5), c('dark blue', "white", 'dark red')),

+                   clustering_method_columns = 'ward.D2', clustering_method_rows = 'ward.D2',

+                   km = 1,

+                   show_column_dend = TRUE, show_row_dend = FALSE,

+                   show_column_names = FALSE,

+                   show_heatmap_legend = FALSE,

+                   bottom_annotation = ha,

+                   bottom_annotation_height = unit(4, 'in'),

+                   row_names_gp = gpar(fontsize = 10))

+lusc_cell_type_order <- rev(colnames(lusc_danaher_scaled)[hm$rowInd])

+#luad -- all

+hm <- heatmap.3(t(luad_danaher_scaled[,lusc_cell_type_order]), trace = 'none', hclustfun = function(x) hclust(x, method= 'ward.D2'), dendrogram = 'column', col = hmcols, na.rm=TRUE, Rowv = FALSE)

+#hm <- heatmap.3(t(luad_danaher_scaled), trace = 'none', hclustfun = function(x) hclust(x, method= 'ward.D2'), dendrogram = 'column', col = hmcols, na.rm=TRUE)

+hm_d <- lapply(unique(substr(rownames(luad_danaher_scaled), 1, 8)), FUN = function(x) {dist(luad_danaher_scaled[grep(pattern = x, rownames(luad_danaher_scaled)),,drop=FALSE])} )

+names(hm_d)   <- unique(substr(rownames(luad_danaher_scaled), 1, 8))

+all_immune_distances <- c(all_immune_distances, hm_d)

+tmp_patients <- unique(substr(rownames(hm$carpet), 1, 8))

+tmp_clusters <- cutree(hm$colDendrogram, k = 2)

+bp <- boxplot(cd8 ~ tmp_clusters, data = luad_danaher_scaled, plot=FALSE)

+tmp_clusters <- ifelse(tmp_clusters == which.min(bp$stats[3,]), yes = 1, no = 2)

+all_clusters <- c(all_clusters, tmp_clusters)

+tmp_df       <- as.data.frame(matrix(data = 0, ncol = length(tmp_patients), nrow = length(tmp_clusters), dimnames = list(names(tmp_clusters), tmp_patients)))

+for(x in rownames(tmp_df)){

+  tmp_df[x,substr(x, 1, 8)] <- tmp_clusters[x]

+}

+# classify groups

+tmp_classify <- apply(tmp_df, MARGIN = 2, table)

+tmp_classify_col <- ifelse(lapply(tmp_classify, length) == 3, yes = '#df9b2f', no = ifelse(unlist(lapply(tmp_classify, FUN = function(x) any(grepl(pattern = 1, x = names(x))))), yes = '#30ABDF', no = '#BD2131'))

+all_classification <- c(all_classification, tmp_classify_col)

+tmp_classify_col <- data.frame(tmp_classify_col, stringsAsFactors = FALSE)

+tmp_classify_col$tmp <- sapply(rownames(tmp_classify_col), FUN = function(x){which(colnames(tmp_df) == x)})

+luad_order <- rownames(tmp_classify_col)[with(tmp_classify_col, order(factor(tmp_classify_col, levels = c('#30ABDF', '#df9b2f', '#BD2131')), tmp))]

+tmp_df     <- tmp_df[,luad_order]

+# and remove the unnecessary column and re-order the colors

+tmp_classify_col$tmp <- NULL

+tmp_classify_col <- tmp_classify_col[luad_order,,drop=FALSE]

+# annotate

+col_list <- lapply(colnames(tmp_df), FUN = function(x){return(c("1" = '#30ABDF', "0" = "#e9e9e9", "2" = '#BD2131'))})

+names(col_list) <- colnames(tmp_df)

+ha <- HeatmapAnnotation(tmp_df, show_legend = FALSE, gap = unit(0, 'mm'),show_annotation_name = TRUE, which = 'column', gp = gpar(col = 'white'), annotation_name_gp = gpar(cex = 0.6, col = tmp_classify_col$tmp_classify_col), na_col = 'brown',

+                        col = col_list)

+hm_luad <- Heatmap(t(luad_danaher_scaled[,lusc_cell_type_order]),

+                   col = circlize::colorRamp2(c(-3.5, 0, 3.5), c('dark blue', "white", 'dark red')),

+                   clustering_method_columns = 'ward.D2', cluster_rows = FALSE,

+                   show_column_dend = TRUE, show_row_dend = FALSE,

+                   show_column_names = FALSE,

+                   show_heatmap_legend = FALSE,

+                   bottom_annotation = ha,

+                   bottom_annotation_height = unit(4, 'in'),

+                   row_names_gp = gpar(fontsize = 10))

+#other

+hm <- heatmap.3(t(other_danaher_scaled), trace = 'none', hclustfun = function(x) hclust(x, method= 'ward.D2'), dendrogram = 'none', col = hmcols)

+hm_d <- lapply(unique(substr(rownames(other_danaher_scaled), 1, 8)), FUN = function(x) {dist(other_danaher_scaled[grep(pattern = x, rownames(other_danaher_scaled)),,drop=FALSE])} )

+names(hm_d)   <- unique(substr(rownames(other_danaher_scaled), 1, 8))

+all_immune_distances <- c(all_immune_distances, hm_d)

+tmp_patients <- unique(substr(rownames(hm$carpet), 1, 8))

+tmp_clusters <- cutree(hm$colDendrogram, k = 2)

+bp <- boxplot(cd8 ~ tmp_clusters, data = other_danaher_scaled, plot=FALSE)

+tmp_clusters <- ifelse(tmp_clusters == which.min(bp$stats[3,]), yes = 1, no = 2)

+all_clusters <- c(all_clusters, tmp_clusters)

+tmp_df       <- as.data.frame(matrix(data = 0, ncol = length(tmp_patients), nrow = length(tmp_clusters), dimnames = list(names(tmp_clusters), tmp_patients)))

+for(x in rownames(tmp_df)){

+  tmp_df[x,substr(x, 1, 8)] <- tmp_clusters[x]

+}

+# classify groups

+tmp_classify <- alply(.data = tmp_df, .margins = 2, .fun = table)

+names(tmp_classify) <- attr(tmp_classify, "split_labels")$X1

+tmp_classify_col <- ifelse(lapply(tmp_classify, length) == 3, yes = '#df9b2f', no = ifelse(unlist(lapply(tmp_classify, FUN = function(x) any(grepl(pattern = 1, x = names(x))))), yes = '#30ABDF', no = '#BD2131'))

+all_classification <- c(all_classification, tmp_classify_col)

+tmp_classify_col <- data.frame(tmp_classify_col, stringsAsFactors = FALSE)

+tmp_classify_col$tmp <- sapply(rownames(tmp_classify_col), FUN = function(x){which(colnames(tmp_df) == x)})

+other_order <- rownames(tmp_classify_col)[with(tmp_classify_col, order(factor(tmp_classify_col, levels = c('#30ABDF', '#df9b2f', '#BD2131')), tmp))]

+tmp_df     <- tmp_df[,other_order]

+# and remove the unnecessary column and re-order the colors

+tmp_classify_col$tmp <- NULL

+tmp_classify_col <- tmp_classify_col[other_order,,drop=FALSE]

+# annotate

+col_list <- lapply(colnames(tmp_df), FUN = function(x){return(c("1" = '#30ABDF', "0" = "#e9e9e9", "2" = '#BD2131'))})

+names(col_list) <- colnames(tmp_df)

+ha <- HeatmapAnnotation(tmp_df, show_legend = FALSE, gap = unit(0, 'mm'),show_annotation_name = TRUE, which = 'column', gp = gpar(col = 'white'), annotation_name_gp = gpar(cex = 0.6, col = tmp_classify_col$tmp_classify_col), na_col = 'brown',

+                        col = col_list)

+hm_other <- Heatmap(t(other_danaher_scaled),

+                    col = circlize::colorRamp2(c(-3.5, 0, 3.5), c('dark blue', "white", 'dark red')),

+                    clustering_method_columns = 'ward.D2', clustering_method_rows = 'ward.D2',

+                    show_column_dend = TRUE, show_row_dend = FALSE,

+                    show_column_names = FALSE,

+                    show_row_names = TRUE,

+                    show_heatmap_legend = FALSE,

+                    bottom_annotation = ha,

+                    bottom_annotation_height = unit(4, 'in'),

+                    row_names_gp = gpar(fontsize = 10))

+tx100_immune_cluster        <- ifelse(is.na(all_clusters), yes = NA, no = ifelse(all_clusters == 1, yes = 'low', no = 'high'))

+names(tx100_immune_cluster) <- names(all_clusters)

+tx100_immune_classification <- ifelse(is.na(all_classification), yes = NA, no = ifelse(all_classification == '#BD2131', yes = 'high', no = ifelse(all_classification == '#df9b2f', yes = 'mixed', no = 'low')))

+names(tx100_immune_classification) <- names(all_classification)

+# add clusters to patient summary

+patient.summary$immune_classification <- tx100_immune_classification[match(patient.summary$CRUKid, names(tx100_immune_classification))]

+patient.summary.region$immune_cluster <- tx100_immune_cluster[match(patient.summary.region$CRUKidRegion, names(tx100_immune_cluster))]

+patient.summary.region$immune_classification <- patient.summary$immune_classification[match(patient.summary.region$CRUKid, patient.summary$CRUKid)]

+# salvage the samples without RNAseq...

+salvaged_clusters <- c()

+for(x in patient.summary.region$CRUKidRegion[is.na(patient.summary.region$cd8.score.danaher)]){

+  print(x)

+  tmpTILs  <- patient.summary.region[x,'pathology_TILs']

+  if(is.na(tmpTILs)){salvaged_clusters <- c(salvaged_clusters, NA)}

+  tmp_hist <- patient.summary.region[x,'simple.hist']

+  tmp_sub  <- patient.summary.region[x,'subtype']

+  if(tmp_hist %in% c('LUSC', 'other', 'LUAD')){

+    df     <- patient.summary.region[which(patient.summary.region$simple.hist == tmp_hist),]

+    out <- classify_based_on_TILs(df)

+  }

+  salvaged_clusters <- c(salvaged_clusters, out)

+}

+names(salvaged_clusters) <- patient.summary.region$CRUKidRegion[is.na(patient.summary.region$cd8.score.danaher)]

+salvaged_classification <- sapply(unique(patient.summary.region$CRUKid[is.na(patient.summary.region$cd8.score.danaher)]), FUN = function(x){

+  new_clusters <- salvaged_clusters[grep(pattern = x, names(salvaged_clusters))]

+  old_clusters <- tx100_immune_cluster[grep(pattern = x, names(tx100_immune_cluster))]

+  all_clusters <- c(new_clusters, old_clusters)

+  all_clusters <- all_clusters[!is.na(all_clusters)]

+  if(length(all_clusters) == 0){return(NA)}

+  if(length(table(all_clusters)) == 1){return(names(table(all_clusters)))}

+  if(length(table(all_clusters)) > 1){return('mixed')}

+})

+# combine the two approaches

+patient.summary.region$salvaged_clusters       <- salvaged_clusters[match(patient.summary.region$CRUKidRegion, names(salvaged_clusters))]

+patient.summary.region$salvaged_classification <- salvaged_classification[match(patient.summary.region$CRUKid, names(salvaged_classification))]

+patient.summary$salvaged_classification        <- salvaged_classification[match(patient.summary$CRUKid, names(salvaged_classification))]

+# save the original clustering

+patient.summary.region$orig_immune_cluster        <- patient.summary.region$immune_cluster

+patient.summary.region$orig_immune_classification <- patient.summary.region$immune_classification

+patient.summary$orig_immune_classification        <- patient.summary$immune_classification

+# and update immune clusters/immune classification

+patient.summary.region$immune_cluster[is.na(patient.summary.region$immune_cluster)] <- patient.summary.region$salvaged_clusters[is.na(patient.summary.region$immune_cluster)]

+patient.summary$immune_classification <- sapply(1:nrow(patient.summary), FUN = function(x){

+  old <- patient.summary$orig_immune_classification[x]

+  new <- patient.summary$salvaged_classification[x]

+  if(is.na(new)){return(old)}

+  else{return(new)}

+})

+patient.summary.region$immune_classification <- patient.summary$immune_classification[match(patient.summary.region$CRUKid, patient.summary$CRUKid)]

+## if we want to check the results when we don't use TILs

+# patient.summary$immune_classification <- patient.summary$orig_immune_classification

+# patient.summary.region$immune_classification <- patient.summary.region$orig_immune_classification

+# patient.summary.region$immune_cluster <- patient.summary.region$orig_immune_cluster

+patient.summary$high_immune <- ifelse(patient.summary$immune_classification == 'high', yes = TRUE, no = FALSE)

+patient.summary$low_evasion <- ifelse(patient.summary$high_immune == TRUE | (patient.summary$immune_edited == FALSE & patient.summary$HLA_disruption == FALSE), yes = TRUE, no = FALSE)

+mutTableRegion$immune_classification <- patient.summary$immune_classification[match(mutTableRegion$CRUKid, patient.summary$CRUKid)]

+mutTableRegion$immune_cluster <- patient.summary.region$immune_cluster[match(mutTableRegion$CRUKRegionID, patient.summary.region$CRUKidRegion)]

+mutTableAll.together.filtered$immune_classification <- patient.summary$immune_classification[match(mutTableAll.together.filtered$CRUKid, patient.summary$CRUKid)]

+keep_to_add$immune_classification <- patient.summary$immune_classification[match(keep_to_add$CRUKid, patient.summary$CRUKid)]

+keep_to_add$immune_cluster        <- patient.summary.region$immune_cluster[match(keep_to_add$CRUKRegionID, patient.summary.region$CRUKidRegion)]

+# Figure 1A-B

+{

+  hm_luad

+  hm_lusc

+  par(orig_par)

+}

+# Figure 2A

+{

+  par(mfrow = c(1,1))

+  plot(total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'genomic'], total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'immune'], xlab = 'Pairwise genomic distance', ylab = 'Pairwise immune distance', pch = 16, col = total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'color'], ylim = c(0,10), las = 1)

+  p.val <- cor.test(total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'genomic'], total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'immune'], method = 's')

+  legend(x = 'topright', legend = paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), '\n', 'rho: ', formatC(p.val$estimate, digits = 1, format = 'e'), sep= ''), bty = 'n')

+  points(total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'genomic'], total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'immune'], xlab = 'Pairwise genomic distance', ylab = 'Pairwise immune distance', pch = 16, col = total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'color'])

+  p.val <- cor.test(total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'genomic'], total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'immune'], method = 's')

+  legend(x = 'bottomright', legend = paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), '\n', 'rho: ', formatC(p.val$estimate, digits = 1, format = 'e'), sep= ''), bty = 'n')

+}

+# Figure 2B-C

+{

+  par(mfrow = c(1,2))

+  tmp <- gsub(pattern = 'low', replacement = '#30ABDF', x = gsub(pattern = 'high', replacement = '#BD2131', x = patient.summary.region$immune_cluster[which(patient.summary.region$simple.hist == 'LUAD')]))

+  boxplot(shannon.ith ~ factor(immune_classification, levels = c('low', 'mixed', 'high')), data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),], las = 1, ylim = c(0,2.4), main = 'Lung adeno.', ylab = 'Shannon entropy')

+  beeswarm(shannon.ith ~ factor(immune_classification, levels = c('low', 'mixed', 'high')), data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),], add = TRUE, pch = 16, pwcol = tmp, cex = 1.6)

+  p.val <- wilcox.test(shannon.ith ~ immune_classification == 'low', data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),])$p.value

+  legend('bottomright', paste('p -- low-mixed/high : ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n')

+  tmp <- gsub(pattern = 'low', replacement = '#30ABDF', x = gsub(pattern = 'high', replacement = '#BD2131', x = patient.summary.region$immune_cluster[which(patient.summary.region$simple.hist == 'LUSC')]))

+  boxplot(shannon.ith ~ factor(immune_classification, levels = c('low', 'mixed', 'high')), data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUSC'),], las = 1, ylim = c(0,2.4), main = 'Lung squam.', ylab = 'Shannon entropy')

+  beeswarm(shannon.ith ~ factor(immune_classification, levels = c('low', 'mixed', 'high')), data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUSC'),], add = TRUE, pch = 16, pwcol = tmp, cex = 1.6)

+  p.val <- wilcox.test(shannon.ith ~ immune_classification == 'low', data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUSC'),])$p.value

+  legend('bottomright', paste('p -- low-mixed/high : ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n')

+}

+# Figure 2D

+{

+  tmp <- patient.summary[,c('ie.all.updated', 'ie.clonal.updated', 'ie.subclonal.updated', 'simple.hist', 'immune_classification')]

+  tmp <- tmp[complete.cases(tmp),]

+  tmp <- tmp[which(tmp$simple.hist != 'other'),]

+  par(mfrow = c(1,3))

+  p.val <- t.test(tmp$ie.clonal.updated[which(tmp$immune_classification == 'high')], tmp$ie.subclonal.updated[which(tmp$immune_classification == 'high')], paired = TRUE)$p.value # less IE in subclones (because clonal IE - subclonal IE < 0)

+  stripchart(tmp[which(tmp$immune_classification == 'high'),c('ie.clonal.updated', 'ie.subclonal.updated')], vertical = TRUE, pch= 16, at = c(0.9, 1.2), xlim =c(0.7,1.4), ylim = c(0.7, 1.22), las = 1, group.names = c('Clonal', 'Subclonal'), cex = 1.5, ylab = 'Immunoediting')

+  segments(x0 = 0.9, y0 = tmp$ie.clonal.updated[which(tmp$immune_classification == 'high')], x1 = 1.2, y1 = tmp$ie.subclonal.updated[which(tmp$immune_classification == 'high')], col = '#BD2131', lwd = 3)

+  legend(legend = paste('paired t-test p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), x = 'topleft', bty = 'n', cex = 0.9)

+  p.val <- t.test(tmp$ie.clonal.updated[which(tmp$immune_classification == 'mixed')], tmp$ie.subclonal.updated[which(tmp$immune_classification == 'mixed')], paired = TRUE)$p.value # less IE in subclones (because clonal IE - subclonal IE < 0)

+  stripchart(tmp[which(tmp$immune_classification == 'mixed'),c('ie.clonal.updated', 'ie.subclonal.updated')], vertical = TRUE, pch= 16, at = c(0.9, 1.2), xlim =c(0.7,1.4), ylim = c(0.7, 1.22), las = 1, group.names = c('Clonal', 'Subclonal'), cex = 1.5, ylab = 'Immunoediting')

+  segments(x0 = 0.9, y0 = tmp$ie.clonal.updated[which(tmp$immune_classification == 'mixed')], x1 = 1.2, y1 = tmp$ie.subclonal.updated[which(tmp$immune_classification == 'mixed')], col = '#df9b2f', lwd = 3)

+  legend(legend = paste('paired t-test p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), x = 'topleft', bty = 'n', cex = 0.9)

+  p.val <- t.test(tmp$ie.clonal.updated[which(tmp$immune_classification == 'low')], tmp$ie.subclonal.updated[which(tmp$immune_classification == 'low')], paired = TRUE)$p.value # less IE in subclones (because clonal IE - subclonal IE < 0)

+  stripchart(tmp[which(tmp$immune_classification == 'low'),c('ie.clonal.updated', 'ie.subclonal.updated')], vertical = TRUE, pch= 16, at = c(0.9, 1.2), xlim =c(0.7,1.4), ylim = c(0.7, 1.22), las = 1, group.names = c('Clonal', 'Subclonal'), cex = 1.5, ylab = 'Immunoediting')

+  segments(x0 = 0.9, y0 = tmp$ie.clonal.updated[which(tmp$immune_classification == 'low')], x1 = 1.2, y1 = tmp$ie.subclonal.updated[which(tmp$immune_classification == 'low')], col = '#30ABDF', lwd = 3)

+  legend(legend = paste('paired t-test p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), x = 'topleft', bty = 'n', cex = 0.9)

+}

+# Figure 2F

+{

+  tmp <- mutTableRegion[which(mutTableRegion$mutation_id %in% mutTableAll.together.filtered$mutation_id[!is.na(mutTableAll.together.filtered$CPNremove)]),]

+  tmp <- tmp[which(tmp$nonsynonymous == TRUE),]

+  tmp <- tmp[which(tmp$DriverMut == FALSE),]

+  tmp <- tmp[,c('CRUKRegionID', 'mutation_id', 'count', 'strong.count', 'any.HLA.loss', 'immune_classification', 'immune_cluster', 'nonsynonymous', 'DriverMut', 'CPNremove')]

+  to_add <- keep_to_add[,c('CRUKRegionID', 'mutation_id', 'count', 'strong.count', 'any.HLA.loss', 'immune_classification', 'immune_cluster', 'nonsynonymous', 'DriverMut', 'CPNremove')]

+  tmp <- rbind(tmp, to_add)

+  par(mfrow = c(2,1))

+  tmp_nodup  <- tmp[!duplicated(tmp$mutation_id),]

+  to_barplot <- sort(table(substr(tmp_nodup$CRUKRegionID, 1, 8)[which(tmp_nodup$count == 1 & tmp_nodup$CPNremove == TRUE)]), decreasing=TRUE)

+  barplot(to_barplot, las = 1, names = NA, ylim = c(0,40), col = 'black', ylab = 'Number CN loss neo', xlab = 'Tumor sample')

+  props <- to_barplot/(patient.summary$ClonalNeo[match(names(to_barplot), patient.summary$CRUKid)])

+  names(props) <- NA

+  barplot(rbind(props, 0.5-props), las = 1, ylim = c(0,0.5), col = c('black', 'lightgrey'), ylab = '% CN loss neo', xlab = 'Tumor sample', border = NA)

+}

+# Figure 2G

+{

+  tmp <- mutTableRegion[which(mutTableRegion$mutation_id %in% mutTableAll.together.filtered$mutation_id[!is.na(mutTableAll.together.filtered$CPNremove)]),]

+  tmp <- tmp[which(tmp$nonsynonymous == TRUE),]

+  tmp <- tmp[which(tmp$DriverMut == FALSE),]

+  tmp <- tmp[,c('regionMutID', 'mutation_id', 'count', 'strong.count', 'any.HLA.loss', 'immune_classification', 'immune_cluster', 'nonsynonymous', 'DriverMut', 'CPNremove')]

+  to_add <- keep_to_add[,c('regionMutID', 'mutation_id', 'count', 'strong.count', 'any.HLA.loss', 'immune_classification', 'immune_cluster', 'nonsynonymous', 'DriverMut', 'CPNremove')]

+  tmp <- rbind(tmp, to_add)

+  # tmp$RegionID <- sapply(tmp$regionMutID, split.n.paste, sp = ':', start=1, end=2, co=':')

+  # tmp$SampleID <- sapply(tmp$regionMutID, split.n.paste, sp = ':', start=1, end=1, co=':')

+  # # downsample to 1 region per patient

+  # all_patients <- unique(tmp$SampleID)

+  # keep_regions <- c()

+  # for(x in all_patients){keep_regions <- c(keep_regions, sample(grep(pattern = x, unique(tmp$RegionID), value = TRUE), size = 1))}

+  # tmp <- tmp[which(tmp$RegionID %in% keep_regions),]

+  {par(mfrow = c(1,1))

+    ests   <- c()

+    p.vals <- c()

+    ors1   <- c()

+    ors2   <- c()

+    neo_ratio <- c()

+    non_neo_ratio <- c()

+    tmp2 <- table(tmp$count, tmp$CPNremove)

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    neo_ratio <- c(neo_ratio, paste(as.character(tmp2['1','TRUE']), '/', as.character(tmp2['1', 'FALSE'] + tmp2['1', 'TRUE']), sep = ''))

+    non_neo_ratio <- c(non_neo_ratio, paste(as.character(tmp2['0','TRUE']), '/', as.character(tmp2['0', 'FALSE'] + tmp2['0', 'TRUE']), sep = ''))

+    tmp2 <- table(tmp$count[which(tmp$immune_cluster %in% c('high'))], tmp$CPNremove[which(tmp$immune_cluster %in% c('high'))])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    neo_ratio <- c(neo_ratio, paste(as.character(tmp2['1','TRUE']), '/', as.character(tmp2['1', 'FALSE'] + tmp2['1', 'TRUE']), sep = ''))

+    non_neo_ratio <- c(non_neo_ratio, paste(as.character(tmp2['0','TRUE']), '/', as.character(tmp2['0', 'FALSE'] + tmp2['0', 'TRUE']), sep = ''))

+    tmp2 <- table(tmp$count[which(tmp$immune_cluster %in% c('low'))], tmp$CPNremove[which(tmp$immune_cluster %in% c('low'))])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    neo_ratio <- c(neo_ratio, paste(as.character(tmp2['1','TRUE']), '/', as.character(tmp2['1', 'FALSE'] + tmp2['1', 'TRUE']), sep = ''))

+    non_neo_ratio <- c(non_neo_ratio, paste(as.character(tmp2['0','TRUE']), '/', as.character(tmp2['0', 'FALSE'] + tmp2['0', 'TRUE']), sep = ''))

+    ests <- ests

+    ors1 <- ors1

+    ors2 <- ors2

+    bp <- barplot(ests, col = c('#00441b', '#BD2131', '#30ABDF'), border = FALSE, las = 1, cex.axis = 0.9, cex.names = 0.7, ylab = 'OR -- neo in region of CPN loss', ylim = c(0,3), names = c('All', 'High', 'Low'), axes = FALSE)

+    axis(side = 2, at = c(0,1,2,3), labels = c(0,1,2,3), las = 2)

+    mtext(text = paste('p = ', formatC(p.vals, digits = 1, format = 'e'), sep = ''), side = 1, line = 2, cex = 0.7, at = bp[,1])

+    segments(x0 = bp[1,1], x1 = bp[1,1], y0 = ors1[1], y1 = ors2[1])

+    segments(x0 = bp[2,1], x1 = bp[2,1], y0 = ors1[2], y1 = ors2[2])

+    segments(x0 = bp[3,1], x1 = bp[3,1], y0 = ors1[3], y1 = ors2[3])

+    abline(h = 1, lty = 2, col = 'black')

+    mtext(text = neo_ratio, side = 1, line = 3, cex = 0.7, at = bp[,1])

+    mtext(text = non_neo_ratio, side = 1, line = 4, cex = 0.7, at = bp[,1])

+  }

+}

+# Figure 2H

+{

+  tmp <- patient.summary[,c('ie.all.updated', 'ie.clonal.updated', 'ie.subclonal.updated', 'simple.hist', 'immune_classification', 'anyCPN_remove')]

+  tmp <- tmp[complete.cases(tmp),]

+  tmp <- tmp[which(tmp$simple.hist != 'other'),]

+  par(mfrow = c(1,2))

+  p.val <- t.test(tmp$ie.clonal.updated[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==TRUE)], tmp$ie.subclonal.updated[which(tmp$immune_classification == 'low' & tmp$anyCPN_remove == TRUE)], paired = TRUE)$p.value # less IE in subclones (because clonal IE - subclonal IE < 0)

+  stripchart(tmp[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==TRUE),c('ie.clonal.updated', 'ie.subclonal.updated')], vertical = TRUE, pch= 16, at = c(0.9, 1.2), xlim =c(0.7,1.4), ylim = c(0.7, 1.22), las = 1, group.names = c('Clonal', 'Subclonal'), cex = 1.5, ylab = 'Immunoediting')

+  segments(x0 = 0.9, y0 = tmp$ie.clonal.updated[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==TRUE)], x1 = 1.2, y1 = tmp$ie.subclonal.updated[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==TRUE)], col = '#30ABDF', lwd = 3)

+  legend(legend = paste('paired t-test p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), x = 'topleft', bty = 'n', cex = 0.9)

+  p.val <- t.test(tmp$ie.clonal.updated[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==FALSE)], tmp$ie.subclonal.updated[which(tmp$immune_classification == 'low' & tmp$anyCPN_remove == FALSE)], paired = TRUE)$p.value # less IE in subclones (because clonal IE - subclonal IE < 0)

+  stripchart(tmp[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==FALSE),c('ie.clonal.updated', 'ie.subclonal.updated')], vertical = TRUE, pch= 16, at = c(0.9, 1.2), xlim =c(0.7,1.4), ylim = c(0.7, 1.22), las = 1, group.names = c('Clonal', 'Subclonal'), cex = 1.5, ylab = 'Immunoediting')

+  segments(x0 = 0.9, y0 = tmp$ie.clonal.updated[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==FALSE)], x1 = 1.2, y1 = tmp$ie.subclonal.updated[which(tmp$immune_classification == 'low'& tmp$anyCPN_remove==FALSE)], col = '#30ABDF', lwd = 3)

+  legend(legend = paste('paired t-test p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), x = 'topleft', bty = 'n', cex = 0.9)

+}

+# Figure 3A

+{

+  layout(mat = matrix(c(1,2,3), nrow = 3), heights = c(0.7, 0.2, 0.1))

+  par(mar = c(2,4,2,2))

+  tmp <- patient.summary[,c('ClonalNeo', 'SubclonalNeo', 'TotalExpressedNeo', 'ClonalExpressedNeo', 'SubclonalExpressedNeo', 'immune_classification', 'simple.hist')]

+  tmp$simple.hist <- factor(tmp$simple.hist, levels = c('LUAD', 'LUSC', 'other'))

+  tmp <- tmp[which(tmp$simple.hist != 'other'),]

+  tmp <- tmp[!is.na(tmp$TotalExpressedNeo),]

+  tmp <- tmp[-which(tmp$ClonalNeo + tmp$SubclonalNeo == 0),]

+  tmp <- tmp[order(tmp$ClonalNeo, decreasing = TRUE),]

+  tmp <- tmp[order(tmp$simple.hist),]

+  tmp$ClonalExpressedNeoDif <- tmp$ClonalNeo-tmp$ClonalExpressedNeo

+  tmp$SubclonalExpressedNeoDif <- tmp$SubclonalNeo-tmp$SubclonalExpressedNeo

+  ord <- rownames(tmp)

+  bp <- barplot(unname(t(tmp[,c('ClonalExpressedNeo', 'ClonalExpressedNeoDif')])), width = 0.5, space = 2, col = c(c('#3182bd', '#3182bd90')), border = FALSE, las = 2, cex.axis = 0.9, ylab = 'Number Neo', ylim = c(0,800))

+  barplot(t(tmp[,c('SubclonalExpressedNeo', 'SubclonalExpressedNeoDif')]), width = 0.5, space = c(3,rep(2, nrow(tmp)-1)), col = c(c('#de2d26', '#de2d2690')), border = FALSE, add = TRUE, axes = FALSE, axisnames = FALSE)

+  legend('topright', legend = c('Clonal Neo', 'Clonal Neo (exp)', 'Subclonal Neo', 'Subclonal Neo (exp)'), fill = c('#3182bd90', '#3182bd', '#de2d2690', '#de2d26'), border = FALSE, bty = 'n', ncol = 2, cex = 0.7)

+  tmp <- mutTableAll.together.filtered[which(mutTableAll.together.filtered$count == 1),]

+  tmp <- tmp[!is.na(tmp$ITHState_RNA),]

+  tmp <- tmp[which(tmp$PyCloneClonal == 'C'),]

+  to_plot <- as.data.frame.matrix(table(tmp$CRUKid, tmp$ITHState_RNA), stringsAsFactors = FALSE)

+  to_plot <- to_plot/(rowSums(to_plot))

+  to_plot <- to_plot[ord,]

+  barplot(rbind(to_plot[,1], 1-to_plot[,1]), col = c('black', 'lightgrey'), border = FALSE, width = 0.5, axisnames = FALSE, las = 2, cex.names = 0.7, ylab = 'Fraction of clonal neoantigens ubiquitously expressed')

+  tmp_col <- ifelse(patient.summary$simple.hist[match(rownames(to_plot), patient.summary$CRUKid)] == 'LUAD', yes = 'dark blue', no = ifelse(patient.summary$simple.hist[match(rownames(to_plot), patient.summary$CRUKid)] == 'LUSC', yes = 'dark red', no = 'grey'))

+  barplot(height = rep(1, nrow(to_plot)), width = 0.5, axisnames = FALSE, col = tmp_col, border = FALSE, axes = FALSE)

+  to_plot$immune_classification <- patient.summary$immune_classification[match(rownames(to_plot), patient.summary$CRUKid)]

+  par(orig_par)

+}

+# Figure 3B

+{

+  tmp <- mutTableAll.together.filtered[which(mutTableAll.together.filtered$nonsynonymous == FALSE),]

+  tmp <- tmp[!is.na(tmp$ITHState_RNA),]

+  tmp <- tmp[which(tmp$PyCloneClonal == 'C'),]

+  tmp <- tmp[which(tmp$DriverMut == FALSE),]

+  not_to_plot <- as.data.frame.matrix(table(tmp$CRUKid, tmp$ITHState_RNA), stringsAsFactors = FALSE)

+  not_to_plot <- not_to_plot/(rowSums(not_to_plot))

+  not_to_plot <- not_to_plot[ord,]

+  patient.summary$fraction_clonal_neo_ubiquitously_expressed <- to_plot[,1][match(patient.summary$CRUKid, rownames(to_plot))]

+  patient.summary$immune_classification <- factor(patient.summary$immune_classification, levels = c('low', 'mixed', 'high'))

+  par(mfrow = c(1,1))

+  boxplot(fraction_clonal_neo_ubiquitously_expressed ~ immune_classification, data = patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),], ylab = 'Fraction of clonal neoantigens UBIQUITOUSLY expressed', las = 1, ylim = c(0,1), outpch = NA)

+  beeswarm(fraction_clonal_neo_ubiquitously_expressed ~ immune_classification, data = patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),], method = 'swarm', corral = 'wrap', add = TRUE, pch = 16, pwcol = ifelse(patient.summary$simple.hist[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC'))] == 'LUAD', yes = 'darkblue', no = ifelse(patient.summary$simple.hist[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC'))] == 'LUSC', yes = 'firebrick4', no = 'lightgrey')))

+  p.val <- wilcox.test(fraction_clonal_neo_ubiquitously_expressed ~ immune_classification == 'low', data = patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),])$p.value

+  legend(x = 'topright', legend = paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)

+}

+# Figure 3C

+{

+  par(mfrow = c(1,1))

+  tmp <- mutTableAll.together.filtered[which(mutTableAll.together.filtered$ITHState_RNA %in% c(1,2,3, 'FALSE')),]

+  {

+    par(mfrow = c(1,1))

+    ests   <- c()

+    p.vals <- c()

+    ors1    <- c()

+    ors2    <- c()

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == TRUE),'count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == FALSE),'count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == TRUE),'count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == FALSE),'count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == TRUE),'count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == FALSE),'count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == TRUE),'count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == FALSE),'count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    ests <- 1/ests

+    ors1 <- 1/ors1

+    ors2 <- 1/ors2

+    bp <- barplot(ests, col = c(sapply(X = c('#00441b', '#BD2131', '#df9b2f', '#30ABDF'), FUN = function(x) rep(x, 2))), border = FALSE, las = 1, cex.axis = 0.9, cex.names = 0.7, ylab = 'OR -- neoantigen expression lost in RNA', ylim = c(0,1.5), names = '', axes = FALSE, space = rep(c(0.6,0.2),4))

+    axis(side = 2, at = c(0,0.5, 1.0, 1.5), labels = c(0, 0.5, 1.0, 1.5), las = 1, cex.axis = 0.8)

+    segments(x0 = bp[1,1], x1 = bp[1,1], y0 = ors1[1], y1 = ors2[1])

+    segments(x0 = bp[2,1], x1 = bp[2,1], y0 = ors1[2], y1 = ors2[2])

+    segments(x0 = bp[3,1], x1 = bp[3,1], y0 = ors1[3], y1 = ors2[3])

+    segments(x0 = bp[4,1], x1 = bp[4,1], y0 = ors1[4], y1 = ors2[4])

+    segments(x0 = bp[5,1], x1 = bp[5,1], y0 = ors1[5], y1 = ors2[5])

+    segments(x0 = bp[6,1], x1 = bp[6,1], y0 = ors1[6], y1 = ors2[6])

+    segments(x0 = bp[7,1], x1 = bp[7,1], y0 = ors1[7], y1 = ors2[7])

+    segments(x0 = bp[8,1], x1 = bp[8,1], y0 = ors1[8], y1 = ors2[8])

+    mtext(text = paste('p = ', round(p.vals, digits = 3), sep = ''), side = 1, line = 2, cex = 0.7, at = bp[,1])

+    mtext(text = c('+HLA LOH', '-HLA LOH'), side = 1, line = 1, cex = 0.7, at = bp[,1])

+    tmp_names <- factor(rep(c('All', 'High', 'Hetero.', 'Low'), each = 2), levels = c('All', 'High', 'Hetero.', 'Low'))

+    mtext(text = c('All', 'High', 'Hetero.', 'Low'), side = 1, line = 3, cex = 0.7, at = aggregate(bp, by = list(tmp_names), mean)$V1)

+    abline(h = 1, lty = 2, col = 'black')

+  }

+}

+# Figure 3D

+{

+  {par(mfrow = c(1,1))

+    ests   <- c()

+    p.vals <- c()

+    ors1    <- c()

+    ors2    <- c()

+    tmp2 <- table(mutTableAll.together.filtered$tcga_gene_minimally_expressed_95[which(mutTableAll.together.filtered$nonsynonymous == TRUE)], mutTableAll.together.filtered$count[which(mutTableAll.together.filtered$nonsynonymous == TRUE)])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(mutTableAll.together.filtered$tcga_gene_minimally_expressed_95[which(mutTableAll.together.filtered$nonsynonymous == TRUE & mutTableAll.together.filtered$immune_classification == 'high')], mutTableAll.together.filtered$count[which(mutTableAll.together.filtered$nonsynonymous == TRUE & mutTableAll.together.filtered$immune_classification == 'high')])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(mutTableAll.together.filtered$tcga_gene_minimally_expressed_95[which(mutTableAll.together.filtered$nonsynonymous == TRUE & mutTableAll.together.filtered$immune_classification == 'mixed')], mutTableAll.together.filtered$count[which(mutTableAll.together.filtered$nonsynonymous == TRUE & mutTableAll.together.filtered$immune_classification == 'mixed')])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(mutTableAll.together.filtered$tcga_gene_minimally_expressed_95[which(mutTableAll.together.filtered$nonsynonymous == TRUE & mutTableAll.together.filtered$immune_classification == 'low')], mutTableAll.together.filtered$count[which(mutTableAll.together.filtered$nonsynonymous == TRUE & mutTableAll.together.filtered$immune_classification == 'low')])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    bp <- barplot(ests, col = c('#00441b', '#BD2131', '#df9b2f', '#30ABDF'), border = FALSE, las = 1, cex.axis = 0.9, cex.names = 0.7, ylab = 'OR -- neoantigens depleted in (TCGA) expressed genes', ylim = c(0,1), names = c('All', 'High', 'Hetero.', 'Low'), axes = FALSE)

+    axis(side = 2, at = c(0,0.5, 1.0), labels = c(0, 0.5, 1.0), las = 1, cex.axis = 0.8)

+    segments(x0 = bp[1,1], x1 = bp[1,1], y0 = ors1[1], y1 = ors2[1])

+    segments(x0 = bp[2,1], x1 = bp[2,1], y0 = ors1[2], y1 = ors2[2])

+    segments(x0 = bp[3,1], x1 = bp[3,1], y0 = ors1[3], y1 = ors2[3])

+    segments(x0 = bp[4,1], x1 = bp[4,1], y0 = ors1[4], y1 = ors2[4])

+    mtext(text = paste('p = ', formatC(p.vals, digits = 1, format = 'e'), sep = ''), side = 1, line = 2, cex = 0.7, at = bp[,1])

+    abline(h = 1, lty = 2, col = 'black')

+  }

+}

+# Figure 4A-B

+{

+  for(cancer in c('LUAD', 'LUSC')){

+    if(cancer == 'LUAD'){

+      colOrder <- c("CRUK0020", "CRUK0016", "CRUK0039", "CRUK0051", "CRUK0027", "CRUK0060", "CRUK0024", "CRUK0035", "CRUK0047", "CRUK0052", "CRUK0026", "CRUK0008", "CRUK0038", "CRUK0045", "CRUK0031", "CRUK0034", "CRUK0006", "CRUK0033", "CRUK0030", "CRUK0015", "CRUK0014", "CRUK0019", "CRUK0007", "CRUK0040", "CRUK0017", "CRUK0001", "CRUK0009", "CRUK0003", "CRUK0029", "CRUK0061", "CRUK0044", "CRUK0004", "CRUK0012", "CRUK0005", "CRUK0041", "CRUK0018", "CRUK0046", "CRUK0048", "CRUK0032", "CRUK0010", "CRUK0013", "CRUK0028", "CRUK0002", "CRUK0022", "CRUK0037", "CRUK0055", "CRUK0023", "CRUK0036", "CRUK0042", "CRUK0053", "CRUK0057", "CRUK0025", "CRUK0050", "CRUK0021", "CRUK0058", "CRUK0011", "CRUK0043", "CRUK0049", "CRUK0054", "CRUK0056", "CRUK0059")

+    }

+    if(cancer == 'LUSC'){

+      colOrder <- c(c("CRUK0086","CRUK0074","CRUK0068","CRUK0079","CRUK0069","CRUK0064","CRUK0083","CRUK0072","CRUK0073","CRUK0081","CRUK0065","CRUK0078","CRUK0067","CRUK0070","CRUK0062","CRUK0082","CRUK0076","CRUK0071","CRUK0066","CRUK0084","CRUK0090","CRUK0075","CRUK0063","CRUK0089","CRUK0085","CRUK0088","CRUK0087","CRUK0092","CRUK0077","CRUK0080","CRUK0093","CRUK0091"))

+    }

+    tmp_df <- data.frame(colnames(all_condensed_alterations)[which(substr(colnames(all_condensed_alterations), 1, 8) %in% colOrder)], stringsAsFactors = FALSE)

+    tmp_df$SampleID <- substr(tmp_df[,1], 1, 8)

+    tmp_df$SampleID <- factor(tmp_df$SampleID, levels = colOrder)

+    tmp_df <- tmp_df[order(tmp_df$SampleID),]

+    condensed_alterations <- all_condensed_alterations[c('HLALOH', 'APC'),tmp_df[,1]]

+    # add in some clinical things

+    tmp_clinical <- matrix(NA, nrow=7, ncol=ncol(condensed_alterations), dimnames = list(c('ClonalNeo', 'SubclonalNeo', 'immune', 'PFS', 'packyears', 'IE', 'low_evasion'), colnames(condensed_alterations)))

+    tmp_clinical['ClonalNeo',]     <- patient.summary.region$ClonalNeo[match(colnames(tmp_clinical), patient.summary.region$CRUKidRegion)]

+    tmp_clinical['SubclonalNeo',]     <- patient.summary.region$SubclonalNeo[match(colnames(tmp_clinical), patient.summary.region$CRUKidRegion)]

+    tmp_clinical['immune',]    <- patient.summary.region$immune_cluster[match(colnames(tmp_clinical), patient.summary.region$CRUKidRegion)]

+    tmp_clinical['IE',]        <- patient.summary$immune_edited[match(substr(colnames(tmp_clinical), 1, 8), patient.summary$CRUKid)]

+    tmp_clinical['PFS',]          <- patient.summary$allan_final_dsf_time[match(substr(colnames(tmp_clinical), 1, 8), patient.summary$CRUKid)]

+    cens                          <- patient.summary$allan_final_dsf[match(substr(colnames(tmp_clinical), 1, 8), patient.summary$CRUKid)]

+    tmp_clinical['packyears',]    <- patient.summary$packyears[match(substr(colnames(tmp_clinical), 1, 8), patient.summary$CRUKid)]

+    tmp_clinical['low_evasion',]  <- patient.summary$low_evasion[match(substr(colnames(tmp_clinical), 1, 8), patient.summary$CRUKid)]

+    color.gradient <- function(x, colors=c("dark blue","white","dark red"), colsteps=81) {

+      return( colorRampPalette(colors) (colsteps) [ findInterval(x, seq(min(x),max(x), length.out=colsteps)) ] )

+    }

+    pfs_times <- seq(from = 34, to = 1275, by = 1)

+    pfs_times_col_1  <- color.gradient(pfs_times, colors = c('#980043', '#d4b9da'), colsteps = 1242)

+    pfs_times_col_0  <- color.gradient(pfs_times, colors = c('#ccece6', '#006d2c'), colsteps = 1242)

+    # packyears

+    tmp_smoke        <- as.numeric(sort(unique(patient.summary[,'packyears'])))*100

+    tmp_smoke        <- tmp_smoke[!is.na(tmp_smoke)]

+    packyearcols <- color.gradient(tmp_smoke, colors = c('lightblue', 'black'))

+    tmp_smoke        <- tmp_smoke/100

+    # plot

+    layout(matrix(1:(nrow(condensed_alterations) + nrow(tmp_clinical)), ncol = 1))

+    par(mar = c(2,2,1,2), oma = c(4,4,4,4))

+    tmp_acceptedRegions_df <- acceptedRegions_df[match(colnames(condensed_alterations), acceptedRegions_df$CRUKidRegion),]

+    rbPal_nsMut     <- colorRampPalette(c('#f0f9e8','#bae4bc','#7bccc4','#43a2ca','#0868ac'))

+    tmp_clinical_colors <- tmp_clinical

+    tmp_clinical_colors['ClonalNeo',]    <- rbPal_nsMut(10)[as.numeric(cut(as.numeric(tmp_clinical['ClonalNeo',]),breaks=10))]

+    tmp_clinical_colors['SubclonalNeo',] <- rbPal_nsMut(10)[as.numeric(cut(as.numeric(tmp_clinical['SubclonalNeo',]),breaks=10))]

+    tmp_clinical_colors['immune',] <- ifelse(is.na(tmp_clinical_colors['immune',]), yes = 'darkgrey', no = ifelse(tmp_clinical_colors['immune',] == 'high', yes = '#BD2131', no = '#30ABDF'))

+    tmp_clinical_colors['PFS',which(cens == 0)] <- pfs_times_col_0[match(tmp_clinical['PFS',which(cens == 0)], pfs_times)]

+    tmp_clinical_colors['PFS',which(cens == 1)] <- pfs_times_col_1[match(tmp_clinical['PFS',which(cens == 1)], pfs_times)]

+    tmp_clinical_colors['packyears',]            <- packyearcols[match(as.numeric(tmp_clinical_colors['packyears',]), tmp_smoke)]

+    tmp_clinical_colors['IE',] <- ifelse(tmp_clinical['IE',] == 'TRUE', yes = '#6a3d9a', no = '#cab2d6')

+    tmp_clinical_colors['low_evasion',] <- ifelse(tmp_clinical['low_evasion',] == 'TRUE', yes = '#2356A7', no = '#BE1E2D')

+    tmp_clinical_colors['low_evasion',] <- ifelse(is.na(tmp_clinical_colors['low_evasion',]), yes = 'darkgrey', no = tmp_clinical_colors['low_evasion',])

+    bp <-  barplot(tmp_clinical[1:2,],

+                   border = NA,axes = FALSE, axisnames = FALSE,

+                   col = c('#377eb8', '#e41a1c'), ylim = c(0,1100),

+                   space = unlist(sapply(tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)], FUN = function(x){c(2,rep(0.2,(x-1)))}))

+    )

+    axis(side = 2, at = c(0,500,1000), labels = c(0,500, 1000), line = 1, las = 1)

+    mtext(text = 'Neo', side=2,line = -25, cex = 5, las = 2, outer = FALSE)

+    sapply(3:7, FUN = function(x){

+      bp <-  barplot(rep(1,ncol(tmp_clinical_colors)),

+                     border = NA,axes = FALSE,

+                     col = tmp_clinical_colors[x,],

+                     space = unlist(sapply(tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)], FUN = function(x){c(2,rep(0.2,(x-1)))}))

+      )

+    })

+    sapply(1:nrow(condensed_alterations), FUN = function(x){

+      bp <-  barplot(rep(1,ncol(condensed_alterations)),border = NA,axes = FALSE,

+                     col = ifelse(as.numeric(condensed_alterations[x,]) == 1, yes = '#008000', no = ifelse(as.numeric(condensed_alterations[x,]) == -1, yes = 'blue', no = 'lightgrey')),

+                     space = unlist(sapply(tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)], FUN = function(x){c(2,rep(0.2,(x-1)))}))

+      )

+      mtext(text = rownames(condensed_alterations)[x], side=2,line = -25, cex = 5, las = 2, outer = FALSE, col = 'black')

+    })

+    bp <-  barplot(rep(1,ncol(condensed_alterations)),border = NA,axes = FALSE,space = unlist(sapply(tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)], FUN = function(x){c(2,rep(0.2,(x-1)))})), plot = FALSE)

+    nr          <- tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)]

+    names(nr)   <- unique(tmp_acceptedRegions_df$CRUKid)

+    text_start  <- sapply(names(nr), FUN = function(x) {bp[(sum(nr[1:which(names(nr) == x)]) - nr[which(names(nr) == x)] + 1),1]} )

+    mtext(text = patient.summary$CRUKid[match(substr(names(nr), 3, nchar(names(nr))), patient.summary$CRUKid)], side=1,line = -1, cex = 5, las = 2, outer = FALSE, at = text_start, col = 'black')

+  }

+  par(orig_par)

+}

+# Figure 4C

+{

+  tmp <- patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),]

+  tmp_low_clonal <- tmp[which(tmp$high_clonal == FALSE),]

+  par(mfrow = c(1,3))

+  survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ low_evasion,

+           data = tmp,

+           main = "LUAD&LUSC\nlow immune evasion",

+           xlab = 'Time (days)',

+           ylab = "PFS",

+           col = c('darkred', 'darkblue'),

+           lwd = 1, las = 1, cex.axis = 0.8)

+  survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ low_evasion,

+           data = tmp_low_clonal,

+           main = "LUAD&LUSC\nlow immune evasion, low clonal neo",

+           xlab = 'Time (days)',

+           ylab = "PFS",

+           col = c('darkred', 'darkblue'),

+           lwd = 1, las = 1, cex.axis = 0.8)

+  tmp$low_evasion_clonal <- ifelse(tmp$low_evasion == TRUE | tmp$high_clonal == TRUE, yes =TRUE, no = FALSE)

+  survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ low_evasion_clonal,

+           data = tmp,

+           main = "LUAD&LUSC\nlow immune evasion, high clonal neo",

+           xlab = 'Time (days)',

+           ylab = "PFS",

+           col = c('darkred', 'darkblue'),

+           lwd = 1, las = 1, cex.axis = 0.8)

+}

+# Figure S3

+{

+  par(mfrow = c(1,1))

+  boxplot(cd8.score.danaher ~ as.character(simple.hist), data = patient.summary.region[which(patient.summary.region$simple.hist %in% c('LUAD', 'LUSC')),], las = 1, ylab = 'Danaher CD8+ Score')

+  beeswarm(cd8.score.danaher ~ as.character(simple.hist), data = patient.summary.region[which(patient.summary.region$simple.hist %in% c('LUAD', 'LUSC')),], add = TRUE, pch = 16)

+  p.val <- wilcox.test(cd8.score.danaher ~ simple.hist, patient.summary.region[which(patient.summary.region$simple.hist %in% c('LUAD', 'LUSC')),])$p.value

+  legend(x = 'topright', legend = paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)

+}

+# Figure S5A

+{

+  par(mfrow = c(1,1))

+  tmp <- patient.summary.region

+  boxplot(pathology_TILs ~ factor(orig_immune_cluster, levels = c('low', 'high')), data = tmp, las = 1, ylab = 'pathology estimated TILs', xlab = 'immune cluster')

+  beeswarm(pathology_TILs ~ factor(orig_immune_cluster, levels = c('low', 'high')), data = tmp, pch = 16, add = TRUE, cex = 1.5, pwcol = ifelse(is.na(tmp$orig_immune_cluster), yes = 'light grey', no = ifelse(tmp$orig_immune_cluster == 'high', yes = '#BD2131', no = '#30ABDF')))

+  p.val <- wilcox.test(pathology_TILs ~ orig_immune_cluster, data = tmp)$p.value

+  legend(x = 'topright', legend = paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)

+}

+# Figure S5B

+{

+  par(mfrow = c(1,1))

+  tmp_all_danaher_accepted <- all_danaher_accepted[,c('cd8', 'cd4', 'bcell', 'cd45', 'cyto' ,'dend', 'mast', 'nkcd56dim', 'nk', 'tcells', 'th1')]

+  tmp_all_danaher_accepted$MDSC_jiang   <- patient.summary.region$MDSC_jiang[match(rownames(tmp_all_danaher_accepted), patient.summary.region$CRUKidRegion)]

+  tmp_all_danaher_accepted$CAF_jiang    <- patient.summary.region$CAF_jiang[match(rownames(tmp_all_danaher_accepted), patient.summary.region$CRUKidRegion)]

+  tmp_all_danaher_accepted$TAM.M2_jiang <- patient.summary.region$TAM.M2_jiang[match(rownames(tmp_all_danaher_accepted), patient.summary.region$CRUKidRegion)]

+  corr <- cor.mtest(tmp_all_danaher_accepted, method = 's')

+  M    <- cor(tmp_all_danaher_accepted, method = 's')

+  corrplot(M, method = 'square', p.mat = corr$p, diag = TRUE, lowCI.mat = corr$lowCI, uppCI.mat = corr$uppCI, type = 'upper')

+}

+# Figure S5C

+{

+  par(mfrow = c(1,2))

+  boxplot(TAM.M2_jiang ~ immune_cluster, data = patient.summary.region, las = 1, ylab = 'TAM M2 (Jiang)', xlab = 'Immune classification', out.pch = NA)

+  beeswarm(TAM.M2_jiang ~ immune_cluster, data = patient.summary.region, pch = 16, add = TRUE, col = 'black', method = 'swarm', corral = 'wrap')

+  p.val <- wilcox.test(TAM.M2_jiang ~ immune_cluster, data = patient.summary.region)$p.value

+  legend(x = 'bottomright', legend = paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)

+  boxplot(MDSC_jiang ~ immune_cluster, data = patient.summary.region, las = 1, ylab = 'MDSC (Jiang)', xlab = 'Immune classification', out.pch = NA)

+  beeswarm(MDSC_jiang ~ immune_cluster, data = patient.summary.region, pch = 16, add = TRUE, col = 'black', method = 'swarm', corral = 'wrap')

+  p.val <- wilcox.test(MDSC_jiang ~ immune_cluster, data = patient.summary.region)$p.value

+  legend(x = 'bottomright', legend = paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)

+}

+# Figure S5D ###

+{

+  patient.summary.region$tmp_class <- ifelse(patient.summary.region$orig_immune_cluster == 'high' & patient.summary.region$pathology_TILs > 70, yes = 'high_high', no = ifelse(patient.summary.region$orig_immune_cluster == 'high' & patient.summary.region$pathology_TILs <= 20, yes = 'high_low', no = ifelse(patient.summary.region$orig_immune_cluster == 'low' & patient.summary.region$pathology_TILs <= 15, yes = 'low_low', no = ifelse(patient.summary.region$orig_immune_cluster == 'low' & patient.summary.region$pathology_TILs >= 48, yes = 'low_high', no = NA))))

+  patient.summary$low_high_region <- ifelse(patient.summary$CRUKid %in% patient.summary.region$CRUKid[which(patient.summary.region$tmp_class == 'low_high')], yes = TRUE, no = FALSE)

+  par(mfrow = c(1,2))

+  boxplot(cd8.score.danaher ~ tmp_class, data = patient.summary.region[which(patient.summary.region$tmp_class %in% c('low_low', 'low_high') & patient.summary.region$simple.hist == 'LUAD'),], las = 1, ylab = 'CD8+ score (Danaher)', ylim = c(0,4.5))

+  beeswarm(cd8.score.danaher ~ tmp_class, data = patient.summary.region[which(patient.summary.region$tmp_class %in% c('low_low', 'low_high') & patient.summary.region$simple.hist == 'LUAD'),], add=TRUE, pch = 16, cex = 1.4)

+  p.val <- wilcox.test(cd8.score.danaher ~ tmp_class, data = patient.summary.region[which(patient.summary.region$tmp_class %in% c('low_low', 'low_high') & patient.summary.region$simple.hist == 'LUAD'),])$p.value

+  legend(x = 'topleft', legend = paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)

+  boxplot(treg.score.danaher ~ tmp_class, data = patient.summary.region[which(patient.summary.region$tmp_class %in% c('low_low', 'low_high') & patient.summary.region$simple.hist == 'LUAD'),], las = 1, ylab = 'Treg score (Danaher)', ylim = c(0,2.5))

+  beeswarm(treg.score.danaher ~ tmp_class, data = patient.summary.region[which(patient.summary.region$tmp_class %in% c('low_low', 'low_high') & patient.summary.region$simple.hist == 'LUAD'),], add=TRUE, pch = 16, cex = 1.4)

+  p.val <- wilcox.test(treg.score.danaher ~ tmp_class, data = patient.summary.region[which(patient.summary.region$tmp_class %in% c('low_low', 'low_high') & patient.summary.region$simple.hist == 'LUAD'),])$p.value

+  legend(x = 'topleft', legend = paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)

+}

+# Figure S5E

+{

+  layout(mat = matrix(c(1,2,0,0,3,4), nrow = 2), heights = c(0.9,0.1), widths = c(1,0.2,1))

+  par(oma = c(3.5,2,0,1)+0.1, mar = c(0,2.5,0,0)+0.5)

+  tmp  <- patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),]

+  tmp  <- tmp[!is.na(tmp$pathology_TILs),]

+  boop <- with(tmp, reorder(CRUKid, pathology_TILs, median, na.rm = TRUE))

+  bp   <- boxplot(pathology_TILs ~ boop, ylim = c(0,100), data = tmp, las = 2, space = 0.3, cex.axis = 1, cex.lab = 0.8, names = FALSE, frame.plot = FALSE, at = 1:length(levels(boop)), outpch = NA)

+  beeswarm(pathology_TILs ~ boop, data = tmp, pch = 16, add = TRUE, cex = 1.5, pwcol = ifelse(is.na(tmp$orig_immune_cluster), yes = 'light grey', no = ifelse(tmp$orig_immune_cluster == 'high', yes = '#BD2131', no = '#30ABDF')))

+  mtext(text = 'pathology TILs', side = 2, line = 2.5, cex = 0.8)

+  legend('topleft', legend = c('immune high cluster', 'immune low cluster'), pch = 16, col = c('#BD2131','#30ABDF'), bty = 'n', cex = 1)

+  legend('bottomright', legend = 'LUAD', bty = 'n')

+  beep <- patient.summary$orig_immune_classification[match(levels(boop), patient.summary$CRUKid)]

+  bp   <- barplot(rep(1, length(beep)), col = ifelse(is.na(beep), yes = 'darkgrey', no = ifelse(beep == 'low', yes = '#30ABDF', no = ifelse(beep == 'mixed', yes = '#df9b2f', no = '#BD2131'))), space = 0.3, axes = FALSE, border = NA)

+  mtext('immune\nclass', side = 2, line = 1, cex = 0.8, las = 1)

+  mtext(text = levels(boop), side = 1, line = 1, las = 2, at = bp[,1], cex = 0.7)

+  tmp  <- patient.summary.region[which(patient.summary.region$simple.hist == 'LUSC'),]

+  tmp  <- tmp[!is.na(tmp$pathology_TILs),]

+  boop <- with(tmp, reorder(CRUKid, pathology_TILs, median, na.rm = TRUE))

+  bp   <- boxplot(pathology_TILs ~ boop, data = tmp, ylim = c(0,100), las = 2, space = 0.3, cex.axis = 1, cex.lab = 0.8, names = FALSE, frame.plot = FALSE, at = 1:length(levels(boop)), outpch = NA)

+  beeswarm(pathology_TILs ~ boop, data = tmp, pch = 16, add = TRUE, cex = 1.5, pwcol = ifelse(is.na(tmp$immune_cluster), yes = 'light grey', no = ifelse(tmp$immune_cluster == 'high', yes = '#BD2131', no = '#30ABDF')))

+  mtext(text = 'pathology TILs', side = 2, line = 2.5, cex = 0.8)

+  legend('topleft', legend = c('immune high cluster', 'immune low cluster'), pch = 16, col = c('#BD2131','#30ABDF'), bty = 'n', cex = 1)

+  legend('bottomright', legend = 'LUSC', bty = 'n')

+  beep <- patient.summary$orig_immune_classification[match(levels(boop), patient.summary$CRUKid)]

+  bp   <- barplot(rep(1, length(beep)), col = ifelse(is.na(beep), yes = 'darkgrey', no = ifelse(beep == 'low', yes = '#30ABDF', no = ifelse(beep == 'mixed', yes = '#df9b2f', no = '#BD2131'))), space = 0.3, axes = FALSE, border = NA)

+  mtext('immune\nclass', side = 2, line = 1, cex = 0.8, las = 1)

+  mtext(text = levels(boop), side = 1, line = 1, las = 2, at = bp[,1], cex = 0.7)

+  par(orig_par)

+}

+# Figure S5F

+{

+  par(mfrow = c(1,2))

+  tmp <- patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),]

+  tmp$regionalPattern[which(tmp$regionalPattern %in% c('MP', 'scar only', 'no tumour'))] <- 'other'

+  tmp$regionalPattern <- factor(tmp$regionalPattern, levels = c('cribriform', 'solid', 'lepidic', 'other', 'papillary', 'acinar'))

+  het_luad_patients <- unique(tmp$CRUKid[which(tmp$orig_immune_classification == 'mixed')])

+  het_tmp <- tmp[which(tmp$CRUKid %in% het_luad_patients),c('CRUKid', 'immune_cluster', 'regionalPattern')]

+  het_tmp$immune_cluster  <- as.character(het_tmp$immune_cluster)

+  het_tmp$regionalPattern <- as.character(het_tmp$regionalPattern)

+  het_tmp$regionalPattern[is.na(het_tmp$regionalPattern)] <- 'NA'

+  het_tmp <- het_tmp[!is.na(het_tmp$immune_cluster),]

+  patient.summary$pathologic_heterogeneity <- sapply(patient.summary$CRUKid, FUN = function(x){if(patient.summary$simple.hist[which(patient.summary$CRUKid == x)] %in% c('LUSC', 'other')){return(NA)}; tmp<-patient.summary.region[which(patient.summary.region$CRUKid == x),]; tmp <- tmp[!is.na(tmp$regionalPattern),]; tmp$regionalPattern[which(tmp$regionalPattern %in% c('MP', 'scar only', 'no tumour'))] <- 'other';  if(all(is.na(tmp$regionalPattern))){return(NA)}; ifelse(length(unique(tmp$regionalPattern)) == 1, yes = return('homo'), no = return('het'))})

+  p.val <- round(fisher.test(table(patient.summary$pathologic_heterogeneity, patient.summary$orig_immune_classification == 'mixed'))$p.value, digits = 3)

+  layout(mat = matrix(c(1,2), nrow = 1), widths = c(0.4,0.6))

+  barplot(table(patient.summary$pathologic_heterogeneity, patient.summary$immune_classification == 'mixed'), names.arg = c('Homogeneous immune', 'Heterogeneous immune'), col = rev(c('#00441b', '#a1d99b')), las = 1, xlab = 'Lung adeno. patients', ylab = 'Number patients', cex.axis = 1, cex.names = 1, cex.lab = 1)

+  legend('topright',legend = c(paste('p: ', p.val, sep = ''), 'Heterogeneous pathology', 'Homogeneous pathology'), fill = rev(c('#00441b', '#a1d99b', NA)), bty = 'n', border = NA, cex = 0.8)

+  plot(x = 0, axes = FALSE, pch = NA, xlim = c(1,30), ylim = c(0,8), ylab = 'Tumor Regions', xlab = '', main = 'Histologies of regions from heterogeneous patients', cex.main = 0.9, frame.plot = FALSE, las =1, cex.lab = 0.9)

+  histcols <- setNames(c('#a6cee3','#33a02c', '#ff7f00', '#6a3d9a', '#fb9a99', 'lightgrey', '#a65628'), c('acinar','solid', 'lepidic', 'cribriform', 'papillary', 'NA', 'other'))

+  for(pat in sort(unique(het_tmp$CRUKid))){

+    x = which(sort(unique(het_tmp$CRUKid)) == pat)

+    tmp_plot <- het_tmp[which(het_tmp$CRUKid == pat),]

+    tmp_plot <- tmp_plot[order(tmp_plot$regionalPattern),]

+    tmp_plot <- tmp_plot[order(tmp_plot$immune_cluster, decreasing = TRUE),]

+    tmp_plot$immune_color <- ifelse(tmp_plot$immune_cluster == 'low', yes = '#30ABDF', no = '#BD2131')

+    tmp_plot$hist_color   <- histcols[match(tmp_plot$regionalPattern, names(histcols))]

+    for(i in 1:nrow(tmp_plot)){

+      rect(xleft = 2*x-0.8, xright = 2*x, ybottom = i-1, ytop = i, col = tmp_plot$immune_color[i], border = NA)

+      rect(xleft = 2*x, xright = 2*x+0.8, ybottom = i-1, ytop = i, col = tmp_plot$hist_color[i], border = NA)

+    }

+  }

+  axis(side = 1, at = seq(from = 2, to = 16, by = 2), labels = sort(unique(het_tmp$CRUKid)), las = 2, cex.axis = 0.8)

+  axis(side = 2, at = c(0,4,8), labels = c(0,4,8), las = 1, cex.axis = 0.8)

+  legend(x = 'topleft', legend = c('low', 'high', names(histcols)), fill = c('#30ABDF', '#BD2131', histcols), border = NA, bty = 'n', ncol = 5, cex = 0.8)

+}

+# Figure S5G

+{

+  par(mfrow = c(1,2))

+  barplot(table(patient.summary$orig_immune_classification)[c('low', 'mixed', 'high')], col = c('#30ABDF', '#df9b2f', '#BD2131'), border = NA, las = 1, ylab = 'Number patients', ylim = c(0,40))

+  barplot(table(patient.summary$immune_classification)[c('low', 'mixed', 'high')], col = c('#30ABDF', '#df9b2f', '#BD2131'), border = NA, las = 1, ylab = 'Number patients', ylim = c(0,40))

+}

+# Figure S5H

+{

+  par(mfrow = c(1,1))

+  het_tmb_patients <- patient.summary$CRUKid[which(patient.summary$TMB_het == TRUE)]

+  het_tmb_dataframe <- data.frame(row.names = het_tmb_patients)

+  het_tmb_dataframe$low_tmb_purity <- sapply(rownames(het_tmb_dataframe), FUN = function(x){

+    regions_low_tmb <- patient.summary.region$CRUKidRegion[which(patient.summary.region$CRUKid == x & patient.summary.region$TotalMut_Varscan/30 <= 10)]

+    summary(patient.summary.region$purity[which(patient.summary.region$CRUKidRegion  %in% regions_low_tmb)])['Median']

+  })

+  het_tmb_dataframe$high_tmb_purity <- sapply(rownames(het_tmb_dataframe), FUN = function(x){

+    regions_high_tmb <- patient.summary.region$CRUKidRegion[which(patient.summary.region$CRUKid == x & patient.summary.region$TotalMut_Varscan/30 > 10)]

+    summary(patient.summary.region$purity[which(patient.summary.region$CRUKidRegion  %in% regions_high_tmb)])['Median']

+  })

+  p.val <- t.test(het_tmb_dataframe$low_tmb_purity, het_tmb_dataframe$high_tmb_purity, paired = TRUE)$p.value

+  stripchart(het_tmb_dataframe, vertical = TRUE, pch= 16, at = c(0.9, 1.2), xlim =c(0.7,1.4), las = 1, group.names = c('Low TMB regions', 'High TMB regions'), cex = 1.5, ylab = 'Tumor region purity')

+  segments(x0 = 0.9, y0 = het_tmb_dataframe$low_tmb_purity, x1 = 1.2, y1 = het_tmb_dataframe$high_tmb_purity, col = '#df9b2f', lwd = 3)

+  legend(legend = paste('paired t-test p: ', round(p.val, digits = 2), sep = ''), x = 'topleft', bty = 'n', cex = 0.9)

+}

+# Figure S6A

+{

+  layout(mat = matrix(c(1,2,3,0), nrow = 4), heights = c(0.9,0.1,0.1, 0.1), widths = c(1))

+  par(oma = c(3.5,2,0,1)+0.1, mar = c(0,2.5,0,0)+0.5)

+  tmp  <- patient.summary.region[which(patient.summary.region$simple.hist %in% c('LUAD', 'LUSC')),]

+  tmp  <- tmp[!is.na(tmp$TIDE_score),]

+  tmp  <- tmp[which(tmp$number.regions.rna.accepted > 1),]

+  boop <- with(tmp, reorder(CRUKid, TIDE_score, FUN = function(x) -min(x, na.rm = TRUE)))

+  bp   <- boxplot(TIDE_score ~ boop, data = tmp, ylim = c(-2,3), las = 2, space = 0.3, cex.axis = 1, cex.lab = 0.8, names = FALSE, frame.plot = FALSE, at = 1:length(levels(boop)), outpch = NA)

+  beeswarm(TIDE_score ~ boop, data = tmp, pch = 16, add = TRUE, cex = 1, pwcol = ifelse(is.na(tmp$immune_cluster), yes = 'light grey', no = ifelse(tmp$immune_cluster == 'high', yes = '#BD2131', no = '#30ABDF')))

+  abline(h = 0, col = 'black', lty = 2)

+  text(x = 0.1, y = 0.1, labels = 'TIDE threshold', pos = 4, cex = 0.7)

+  mtext(text = 'TIDE Score', side = 2, line = 2.5, cex = 1)

+  legend('topleft', legend = c('immune high cluster', 'immune low cluster'), pch = 16, col = c('#BD2131','#30ABDF'), bty = 'n', cex = 1)

+  #legend('bottomright', legend = cancer, bty = 'n')

+  beep <- patient.summary$immune_classification[match(levels(boop), patient.summary$CRUKid)]

+  bp   <- barplot(rep(1, length(beep)), col = ifelse(beep == 'low', yes = '#30ABDF', no = ifelse(beep == 'mixed', yes = '#df9b2f', no = '#BD2131')), space = 0.3, axes = FALSE, border = NA)

+  mtext('immune\nclass', side = 2, line = 1, cex = 0.8, las = 1)

+  beep       <- patient.summary$TIDE_enrichment_group[match(levels(boop), patient.summary$CRUKid)]

+  beep_color <- ifelse(is.na(beep), yes = 'lightgrey', no = ifelse(beep == 'False', yes = '#e0f3f8', no = ifelse(beep == 'True', yes = '#4575b4', no = '#df9b2f')))

+  bp         <- barplot(rep(1, length(beep)), col = beep_color, space = 0.3, axes = FALSE, border = NA)

+  mtext('TIDE', side = 2, line = 1, cex = 0.8, las = 1)

+  par(orig_par)

+}

+# Figure S6B

+{

+  layout(mat = matrix(c(1,2,3,0), nrow = 4), heights = c(0.9,0.1,0.1, 0.1, 0.1), widths = c(1))

+  par(oma = c(3.5,2,0,1)+0.1, mar = c(0,2.5,0,0)+0.5)

+  tmp  <- patient.summary.region[which(patient.summary.region$simple.hist %in% c('LUAD', 'LUSC')),]

+  tmp  <- tmp[!is.na(tmp$ipres_enrichment),]

+  tmp  <- tmp[which(tmp$number.regions.rna.accepted > 1),]

+  boop <- with(tmp, reorder(CRUKid, ipres_enrichment, max, na.rm = TRUE))

+  bp   <- boxplot(ipres_enrichment ~ boop, data = tmp, ylim = c(-2,2), las = 2, space = 0.3, cex.axis = 1, cex.lab = 0.8, names = FALSE, frame.plot = FALSE, at = 1:length(levels(boop)), outpch = NA)

+  beeswarm(ipres_enrichment ~ boop, data = tmp, pch = 16, add = TRUE, cex = 1.2, pwcol = ifelse(is.na(tmp$immune_cluster), yes = 'light grey', no = ifelse(tmp$immune_cluster == 'high', yes = '#BD2131', no = '#30ABDF')))

+  abline(h = 0.35, col = 'black', lty = 2)

+  text(x = 0.1, y = 0.4, labels = 'IPRES threshold', pos = 4, cex = 0.7)

+  mtext(text = 'IPRES Score', side = 2, line = 2.5, cex = 1)

+  legend('topleft', legend = c('immune high cluster', 'immune low cluster'), pch = 16, col = c('#BD2131','#30ABDF'), bty = 'n', cex = 1)

+  beep       <- patient.summary$ipres_enrichment_group[match(levels(boop), patient.summary$CRUKid)]

+  beep_color <- ifelse(is.na(beep), yes = 'lightgrey', no = ifelse(beep == 'FALSE', yes = '#e0f3f8', no = ifelse(beep == 'TRUE', yes = '#4575b4', no = '#df9b2f')))

+  bp         <- barplot(rep(1, length(beep)), col = beep_color, space = 0.3, axes = FALSE, border = NA)

+  mtext('IPRES', side = 2, line = 1, cex = 0.8, las = 1)

+  beep <- patient.summary$immune_classification[match(levels(boop), patient.summary$CRUKid)]

+  bp   <- barplot(rep(1, length(beep)), col = ifelse(beep == 'low', yes = '#30ABDF', no = ifelse(beep == 'mixed', yes = '#df9b2f', no = '#BD2131')), space = 0.3, axes = FALSE, border = NA)

+  mtext('immune\nclass', side = 2, line = 1, cex = 0.8, las = 1)

+  par(orig_par)

+}

+# Figure S6C-D

+{

+  layout(mat = matrix(c(1,2,3,3), nrow = 2), heights = c(0.9, 0.1), widths = c(0.7, 0.3))

+  par(oma = c(3.5,2,0,1)+0.1, mar = c(0,2.5,0,0)+0.5)

+  tmp  <- patient.summary.region[which(patient.summary.region$simple.hist %in% c('LUAD', 'LUSC')),]

+  tmp  <- tmp[!is.na(tmp$ifng_signature_expanded),]

+  tmp  <- tmp[which(tmp$number.regions.rna.accepted > 1),]

+  boop <- with(tmp, reorder(CRUKid, ifng_signature_expanded, max, na.rm = TRUE))

+  bp   <- boxplot(ifng_signature_expanded ~ boop, data = tmp, ylim = c(4,10), las = 2, space = 0.3, cex.axis = 1, cex.lab = 0.8, names = FALSE, frame.plot = FALSE, at = 1:length(levels(boop)), outpch = NA)

+  beeswarm(ifng_signature_expanded ~ boop, data = tmp, pch = 16, add = TRUE, cex = 1.2, pwcol = ifelse(is.na(tmp$immune_cluster), yes = 'light grey', no = ifelse(tmp$immune_cluster == 'high', yes = '#BD2131', no = '#30ABDF')))

+  mtext(text = 'IFNg signature, expanded', side = 2, line = 2.5, cex = 1)

+  legend('topleft', legend = c('immune high cluster', 'immune low cluster'), pch = 16, col = c('#BD2131','#30ABDF'), bty = 'n', cex = 1)

+  beep <- patient.summary$immune_classification[match(levels(boop), patient.summary$CRUKid)]

+  bp   <- barplot(rep(1, length(beep)), col = ifelse(beep == 'low', yes = '#30ABDF', no = ifelse(beep == 'mixed', yes = '#df9b2f', no = '#BD2131')), space = 0.3, axes = FALSE, border = NA)

+  mtext('immune\nclass', side = 2, line = 1, cex = 0.8, las = 1)

+  boxplot(Ayers_diff ~ immune_classification == 'mixed', patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),], las = 1, xlab = 'Immune heterogeneous', ylab = 'IFNg signature, expanded (Ayers)')

+  beeswarm(Ayers_diff ~ immune_classification == 'mixed', patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),], pch = 16, add=TRUE)

+  p.val <- wilcox.test(Ayers_diff ~ immune_classification == 'mixed', patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),])

+  legend('topleft', paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), sep = ''), bty = 'n', cex = 1)

+  par(orig_par)

+}

+# Figure S6E

+{

+  layout(mat = matrix(c(1,2), nrow = 2), heights = c(0.9,0.1), widths = c(1))

+  par(oma = c(3.5,2,0,1)+0.1, mar = c(0,2.5,0,0)+0.5)

+  tmp  <- patient.summary.region[which(patient.summary.region$simple.hist != 'other'),]

+  boop <- with(tmp, reorder(CRUKid, TotalMut_Varscan, max, na.rm = TRUE))

+  bp   <- boxplot(TotalMut_Varscan/30 ~ boop, data = tmp, ylim = c(0.5,100), las = 2, space = 0.3, cex.axis = 1, cex.lab = 0.8, names = FALSE, frame.plot = FALSE, at = 1:length(levels(boop)), outpch = NA, log = 'y', col = 'white')

+  stripchart(TotalMut_Varscan/30 ~ boop, data = tmp, pch = 16, add = TRUE, cex = 1, vertical=TRUE)

+  abline(h = 10, col = 'black', lty = 2)

+  text(x = 0.1, y = 11, labels = 'TMB threshold', pos = 4, cex = 0.7)

+  mtext(text = 'TMB (mut/Mb) [VarScan]', side = 2, line = 2.5, cex = 1)

+  beep <- patient.summary$TMB_het_col[match(levels(boop), patient.summary$CRUKid)]

+  bp   <- barplot(rep(1, length(beep)), col = beep, space = 0.3, axes = FALSE, border = NA)

+  mtext('TMB +', side = 2, line = 1, cex = 0.8, las = 1)

+  par(orig_par)

+}

+# Figure S6F

+{

+  par(mfrow = c(5,1), mar = c(1,4,1,2))

+  tmp <- patient.summary[,c('simple.hist', 'immune_classification', 'ipres_enrichment_group', 'TIDE_enrichment_group', 'TMB_any', 'TMB_het')]

+  tmp$TMB_group <- ifelse(tmp$TMB_any & tmp$TMB_het, yes = 'mixed', no = ifelse(tmp$TMB_any, yes = 'TRUE', no ='FALSE'))

+  tmp$TIDE_enrichment_group <- gsub(pattern = 'True', replacement = 'TRUE', x = tmp$TIDE_enrichment_group)

+  tmp$TIDE_enrichment_group <- gsub(pattern = 'False', replacement = 'FALSE', x = tmp$TIDE_enrichment_group)

+  tmp <- tmp[which(tmp$simple.hist != 'other'),]

+  tmp$num_pos <- rowSums(tmp[,c('TMB_group', 'TIDE_enrichment_group', 'ipres_enrichment_group')]=='TRUE', na.rm = TRUE)

+  tmp <- tmp[with(tmp, order(immune_classification, num_pos, TMB_group, TIDE_enrichment_group, ipres_enrichment_group, decreasing = TRUE)),]

+  beep       <- tmp$simple.hist

+  beep_color <- ifelse(is.na(beep), yes = 'lightgrey', no = ifelse(beep == 'LUAD', yes = 'dark blue', no = ifelse(beep == 'LUSC', yes = 'dark red', no = 'lightgrey')))

+  bp         <- barplot(rep(1, length(beep)), col = beep_color, space = 0.3, axes = FALSE, border = NA)

+  mtext('Hist.', side = 2, line = 1, cex = 0.8, las = 1)

+  beep <- tmp$immune_classification

+  bp   <- barplot(rep(1, length(beep)), col = ifelse(is.na(beep), yes = 'lightgrey', no = ifelse(beep == 'low', yes = '#30ABDF', no = ifelse(beep == 'mixed', yes = '#df9b2f', no = '#BD2131'))), space = 0.3, axes = FALSE, border = NA)

+  mtext('immune\nclass', side = 2, line = 1, cex = 0.8, las = 1)

+  beep       <- tmp$TMB_group

+  beep_color <- ifelse(is.na(beep), yes = 'lightgrey', no = ifelse(beep == 'FALSE', yes = '#e0f3f8', no = ifelse(beep == 'TRUE', yes = '#4575b4', no = '#df9b2f')))

+  bp         <- barplot(rep(1, length(beep)), col = beep_color, space = 0.3, axes = FALSE, border = NA)

+  mtext('TMB+', side = 2, line = 1, cex = 0.8, las = 1)

+  beep       <- tmp$TIDE_enrichment_group

+  beep_color <- ifelse(is.na(beep), yes = 'lightgrey', no = ifelse(beep == 'FALSE', yes = '#e0f3f8', no = ifelse(beep == 'TRUE', yes = '#4575b4', no = '#df9b2f')))

+  bp         <- barplot(rep(1, length(beep)), col = beep_color, space = 0.3, axes = FALSE, border = NA)

+  mtext('TIDE', side = 2, line = 1, cex = 0.8, las = 1)

+  beep       <- tmp$ipres_enrichment_group

+  beep_color <- ifelse(is.na(beep), yes = 'lightgrey', no = ifelse(beep == 'FALSE', yes = '#e0f3f8', no = ifelse(beep == 'TRUE', yes = '#4575b4', no = '#df9b2f')))

+  bp         <- barplot(rep(1, length(beep)), col = beep_color, space = 0.3, axes = FALSE, border = NA)

+  mtext('IPRES', side = 2, line = 1, cex = 0.8, las = 1)

+  par(orig_par)

+}

+# Figure S7A ###

+{

+  par(mfrow = c(1,1))

+  plot(total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'cn'], total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'immune'], xlab = 'Pairwise cn distance', ylab = 'Pairwise immune distance', pch = 16, col = total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'color'], las = 1)

+  p.val <- cor.test(total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'cn'], total_comparisons[which(total_comparisons$simple.hist == 'LUAD'),'immune'], method = 's')

+  legend(x = 'topleft', legend = paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), '\n', 'rho: ', formatC(p.val$estimate, digits = 1, format = 'e'), sep= ''), bty = 'n')

+  points(total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'cn'], total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'immune'], xlab = 'Pairwise cn distance', ylab = 'Pairwise immune distance', pch = 16, col = total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'color'])

+  p.val <- cor.test(total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'cn'], total_comparisons[which(total_comparisons$simple.hist == 'LUSC'),'immune'], method = 's')

+  legend(x = 'topright', legend = paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), '\n', 'rho: ', formatC(p.val$estimate, digits = 1, format = 'e'), sep= ''), bty = 'n')

+}

+# Figure S7C

+{

+  par(mfrow = c(1,2))

+  boxplot((nk.score.danaher)/total.til.score.danahaer ~ !homo_c, data = patient.summary.region[which(patient.summary.region$hla_c_lost == TRUE),], las = 1, ylab = 'NK cell', xlab = 'C1/C2 heterozygosity', main = 'Patients with HLA-C LOH', ylim = c(0,1))

+  beeswarm((nk.score.danaher)/total.til.score.danahaer ~ !homo_c, data = patient.summary.region[which(patient.summary.region$hla_c_lost == TRUE),], add=TRUE, pch = 16, col = 'darkgreen')

+  p.val <- wilcox.test((nk.score.danaher/total.til.score.danahaer) ~ homo_c, data = patient.summary.region[which(patient.summary.region$hla_c_lost == TRUE),])$p.value

+  legend('topleft', paste('p: ', formatC(p.val, digits = 1, format = 'e'), sep = ''), bty = 'n', cex = 0.8)

+  legend('topright', 'tracerx', cex = 0.8, bty = 'n')

+  boxplot((nk.score.danaher)/total.til.score.danahaer ~ !homo_c, data = patient.summary.region[which(patient.summary.region$hla_c_lost == FALSE),], las = 1, ylab = 'NK cell', xlab = 'C1/C2 heterozygosity', main = 'Patients without HLA-C LOH', ylim = c(0,1))

+  beeswarm((nk.score.danaher)/total.til.score.danahaer ~ !homo_c, data = patient.summary.region[which(patient.summary.region$hla_c_lost == FALSE),], add=TRUE, pch = 16, col = 'darkgreen')

+  p.val <- wilcox.test((nk.score.danaher)/total.til.score.danahaer ~ homo_c, data = patient.summary.region[which(patient.summary.region$hla_c_lost == FALSE),])$p.value

+  legend('topleft', paste('p: ', formatC(p.val, digits = 1, format = 'e'), sep = ''), bty = 'n', cex = 0.8)

+}

+# Figure S7D-E

+{

+  par(mfrow = c(1,2))

+  plot.with.confidence(patient.summary.region$shannon.ith[which(patient.summary.region$simple.hist == 'LUAD')], patient.summary.region$cd8.score.danaher[which(patient.summary.region$simple.hist == 'LUAD')], pch = 16, xlab = 'Shannon ITH', ylab = 'CD8+ Danaher', main = 'Lung adeno.', col = 'dark blue', cex = 1.6)

+  plot.with.confidence(patient.summary.region$shannon.ith[which(patient.summary.region$simple.hist == 'LUSC')], patient.summary.region$cd8.score.danaher[which(patient.summary.region$simple.hist == 'LUSC')], pch = 16, xlab = 'Shannon ITH', ylab = 'CD8+ Danaher', main = 'Lung squam.', col = 'dark red', cex = 1.6)

+}

+# Figure S7F

+{

+  par(mfrow = c(1,1))

+  plot(patient.summary.region$purity[which(patient.summary.region$simple.hist == 'LUAD')], patient.summary.region$pathology_TILs[which(patient.summary.region$simple.hist == 'LUAD')], pch = 16, col = 'dark blue', ylab = 'pathology estimated TILs', xlab = 'tumor purity', las = 1, main = '', cex.main = 1, font.main = 1, xlim = c(0,1))

+  p.val <- cor.test(patient.summary.region$purity[which(patient.summary.region$simple.hist == 'LUAD')], patient.summary.region$pathology_TILs[which(patient.summary.region$simple.hist == 'LUAD')], method = 's')

+  legend(x = 'topright', legend = paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), '\n', 'rho: ', formatC(p.val$estimate, digits = 1, format = 'e'), sep= ''), bty = 'n', cex = 0.7)

+  points(patient.summary.region$purity[which(patient.summary.region$simple.hist == 'LUSC')], patient.summary.region$pathology_TILs[which(patient.summary.region$simple.hist == 'LUSC')], pch = 16, col = 'dark red', ylab = 'pathology estimated TILs', xlab = 'tumor purity')

+  p.val <- cor.test(patient.summary.region$purity[which(patient.summary.region$simple.hist == 'LUSC')], patient.summary.region$pathology_TILs[which(patient.summary.region$simple.hist == 'LUSC')], method = 's')

+  legend(x = 'bottomright', legend = paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), '\n', 'rho: ', formatC(p.val$estimate, digits = 1, format = 'e'), sep= ''), bty = 'n', cex = 0.7)

+}

+# Figure S7G

+{

+  par(mfrow = c(1,1))

+  tmp <- gsub(pattern = '0%', replacement = '#30ABDF', x = gsub(pattern = '50%', replacement = '#BD2131', x = patient.summary.region$TIL_quart[which(patient.summary.region$simple.hist == 'LUAD')]))

+  boxplot(shannon.ith ~ TIL_cat, data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),], las = 1, main = 'Lung adeno.')

+  beeswarm(shannon.ith ~ TIL_cat, data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),], add = TRUE, pch = 16, pwcol = tmp, cex = 1.6)

+  p.val <- wilcox.test(shannon.ith ~ TIL_cat == 'low', data = patient.summary.region[which(patient.summary.region$simple.hist == 'LUAD'),])$p.value

+  legend('bottomright', paste('p -- low-mixed/high : ', round(p.val, digits = 2), sep = ''), bty = 'n')

+}

+# Figure S7H-I

+{

+  tmp <- patient.summary[,c('ie.all.updated', 'ie.clonal.updated', 'ie.subclonal.updated', 'simple.hist', 'immune_classification', 'anyCPN_remove')]

+  tmp <- tmp[complete.cases(tmp),]

+  par(mfrow = c(1,2))

+  boxplot(ie.all.updated ~ as.character(simple.hist), data = tmp[which(tmp$simple.hist %in% c('LUAD', 'LUSC')),], las = 1, ylab = 'Immunoediting score')

+  beeswarm(ie.all.updated ~ as.character(simple.hist), data = tmp[which(tmp$simple.hist %in% c('LUAD', 'LUSC')),], pwcol = ifelse(tmp$simple.hist[which(tmp$simple.hist %in% c('LUAD', 'LUSC'))] == 'LUAD', yes = 'dark blue', no = 'dark red'), pch = 16, add =TRUE)

+  p.val <- wilcox.test(ie.all.updated ~ as.character(simple.hist), data = tmp[which(tmp$simple.hist %in% c('LUAD', 'LUSC')),])$p.value

+  legend('bottomright', paste('p: ', formatC(p.val, format = 'e', digits = 1), sep = ''), bty = 'n', cex = 1.3)

+  p.val <- cor.test(as.numeric(patient.summary$ie.all.updated[which(patient.summary$simple.hist != 'other')]),  as.numeric(patient.summary$number.unique.hlas[which(patient.summary$simple.hist != 'other')]), method= 's')

+  boxplot(ie.all.updated ~ number.unique.hlas, data = patient.summary[which(patient.summary$simple.hist != 'other'),], las = 1, ylab = 'Immunoediting score', xlab = 'Number unique HLAs')

+  beeswarm(ie.all.updated ~ number.unique.hlas, data = patient.summary[which(patient.summary$simple.hist != 'other'),], pch = 16, add =TRUE)

+  legend(x = 'topleft', legend = paste('p: ', formatC(p.val$p.value, digits = 1, format = 'e'), '\nrho: ', round(p.val$estimate, digits = 2), sep = ''), bty = 'n', cex = 1.3)

+}

+# Figure S7J

+{

+  tmp <- mutTableAll.together.filtered[which(mutTableAll.together.filtered$ITHState_RNA %in% c(1,2,3, 'FALSE')),]

+  {

+    par(mfrow = c(1,1))

+    ests   <- c()

+    p.vals <- c()

+    ors1    <- c()

+    ors2    <- c()

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == TRUE),'strong.count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$any.HLA.loss == FALSE),'strong.count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == TRUE),'strong.count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('high') & tmp$any.HLA.loss == FALSE),'strong.count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == TRUE),'strong.count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('mixed') & tmp$any.HLA.loss == FALSE),'strong.count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == TRUE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == TRUE),'strong.count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    tmp2 <- table(tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == FALSE),'lost_in_rna'], tmp[which(tmp$nonsynonymous == TRUE & tmp$immune_classification %in% c('low') & tmp$any.HLA.loss == FALSE),'strong.count'])

+    f.test  <- fisher.test(tmp2)

+    ests    <- c(ests, f.test$estimate)

+    p.vals  <- c(p.vals, f.test$p.value)

+    ors1    <- c(ors1, f.test$conf.int[1])

+    ors2    <- c(ors2, f.test$conf.int[2])

+    ests <- 1/ests

+    ors1 <- 1/ors1

+    ors2 <- 1/ors2

+    bp <- barplot(ests, col = c(sapply(X = c('#00441b', '#BD2131', '#df9b2f', '#30ABDF'), FUN = function(x) rep(x, 2))), border = FALSE, las = 1, cex.axis = 0.9, cex.names = 0.7, ylab = 'OR -- neoantigen expression lost in RNA', ylim = c(0,1.5), names = '', axes = FALSE, space = rep(c(0.6,0.2),4))

+    axis(side = 2, at = c(0,0.5, 1.0, 1.5), labels = c(0, 0.5, 1.0, 1.5), las = 1, cex.axis = 0.8)

+    segments(x0 = bp[1,1], x1 = bp[1,1], y0 = ors1[1], y1 = ors2[1])

+    segments(x0 = bp[2,1], x1 = bp[2,1], y0 = ors1[2], y1 = ors2[2])

+    segments(x0 = bp[3,1], x1 = bp[3,1], y0 = ors1[3], y1 = ors2[3])

+    segments(x0 = bp[4,1], x1 = bp[4,1], y0 = ors1[4], y1 = ors2[4])

+    segments(x0 = bp[5,1], x1 = bp[5,1], y0 = ors1[5], y1 = ors2[5])

+    segments(x0 = bp[6,1], x1 = bp[6,1], y0 = ors1[6], y1 = ors2[6])

+    segments(x0 = bp[7,1], x1 = bp[7,1], y0 = ors1[7], y1 = ors2[7])

+    segments(x0 = bp[8,1], x1 = bp[8,1], y0 = ors1[8], y1 = ors2[8])

+    mtext(text = paste('p = ', round(p.vals, digits = 3), sep = ''), side = 1, line = 2, cex = 0.7, at = bp[,1])

+    mtext(text = c('+HLA LOH', '-HLA LOH'), side = 1, line = 1, cex = 0.7, at = bp[,1])

+    tmp_names <- factor(rep(c('All', 'High', 'Hetero.', 'Low'), each = 2), levels = c('All', 'High', 'Hetero.', 'Low'))

+    mtext(text = c('All', 'High', 'Hetero.', 'Low'), side = 1, line = 3, cex = 0.7, at = aggregate(bp, by = list(tmp_names), mean)$V1)

+    abline(h = 1, lty = 2, col = 'black')

+  }

+}

+# Figure S7K

+{

+  par(mfrow = c(1,2))

+  neo <- table(mutTableRegion$mutant.expressed.binary[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$count == 1)], mutTableRegion$expression.tpm[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$count == 1)] > 1)

+  non_neo <- table(mutTableRegion$mutant.expressed.binary[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$count == 0)], mutTableRegion$expression.tpm[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$count == 0)] > 1)

+  k <- rbind(rev(non_neo[1,]), rev(neo[1,]))

+  rownames(k) <- c('non-neo', 'neo')

+  f.test <- fisher.test(rbind(rev(non_neo[1,]), rev(neo[1,])))

+  barplot(t(t(k)/rowSums(t(k))), names.arg = c('Gene expressed', 'Gene not expressed'), las = 1, col = c('#a6bddb', '#2b8cbe'), main = 'Neo')

+  legend(x = 'bottomleft', fill =c('#a6bddb', '#2b8cbe'), border = NA, bty = 'n', legend = c('Non-neo', 'Neo'), xpd = TRUE, inset = c(0,-0.35))

+  legend(x = 'bottomleft', border = NA, bty = 'n', legend = paste('p: ', formatC(f.test$p.value, digits = 1, format = 'e'), '\nOR: ', round(f.test$estimate, digits = 1), sep = ''), xpd = FALSE)

+  # strong neo

+  neo <- table(mutTableRegion$mutant.expressed.binary[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$strong.count == 1)], mutTableRegion$expression.tpm[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$strong.count == 1)] > 1)

+  non_neo <- table(mutTableRegion$mutant.expressed.binary[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$strong.count == 0)], mutTableRegion$expression.tpm[which(mutTableRegion$nonsynonymous == TRUE & mutTableRegion$strong.count == 0)] > 1)

+  k <- rbind(rev(non_neo[1,]), rev(neo[1,]))

+  rownames(k) <- c('non-neo', 'neo')

+  f.test <- fisher.test(rbind(rev(non_neo[1,]), rev(neo[1,])))

+  barplot(t(t(k)/rowSums(t(k))), names.arg = c('Gene expressed', 'Gene not expressed'), las = 1, col = c('#a6bddb', '#2b8cbe'), main = 'Strong neo')

+  legend(x = 'bottomleft', fill =c('#a6bddb', '#2b8cbe'), border = NA, bty = 'n', legend = c('Non-neo', 'Neo'), xpd = TRUE, inset = c(0,-0.35))

+  legend(x = 'bottomleft', border = NA, bty = 'n', legend = paste('p: ', formatC(f.test$p.value, digits = 1, format = 'e'), '\nOR: ', round(f.test$estimate, digits = 1), sep = ''), xpd = FALSE)

+}

+# Figure S8

+{

+  for(cancer in c('LUAD', 'LUSC')){

+    if(cancer == 'LUAD'){

+      colOrder <- c("CRUK0020", "CRUK0016", "CRUK0039", "CRUK0051", "CRUK0027", "CRUK0060", "CRUK0024", "CRUK0035", "CRUK0047", "CRUK0052", "CRUK0026", "CRUK0008", "CRUK0038", "CRUK0045", "CRUK0031", "CRUK0034", "CRUK0006", "CRUK0033", "CRUK0030", "CRUK0015", "CRUK0014", "CRUK0019", "CRUK0007", "CRUK0040", "CRUK0017", "CRUK0001", "CRUK0009", "CRUK0003", "CRUK0029", "CRUK0061", "CRUK0044", "CRUK0004", "CRUK0012", "CRUK0005", "CRUK0041", "CRUK0018", "CRUK0046", "CRUK0048", "CRUK0032", "CRUK0010", "CRUK0013", "CRUK0028", "CRUK0002", "CRUK0022", "CRUK0037", "CRUK0055", "CRUK0023", "CRUK0036", "CRUK0042", "CRUK0053", "CRUK0057", "CRUK0025", "CRUK0050", "CRUK0021", "CRUK0058", "CRUK0011", "CRUK0043", "CRUK0049", "CRUK0054", "CRUK0056", "CRUK0059")

+    }

+    if(cancer == 'LUSC'){

+      colOrder <- c(c("CRUK0086","CRUK0074","CRUK0068","CRUK0079","CRUK0069","CRUK0064","CRUK0083","CRUK0072","CRUK0073","CRUK0081","CRUK0065","CRUK0078","CRUK0067","CRUK0070","CRUK0062","CRUK0082","CRUK0076","CRUK0071","CRUK0066","CRUK0084","CRUK0090","CRUK0075","CRUK0063","CRUK0089","CRUK0085","CRUK0088","CRUK0087","CRUK0092","CRUK0077","CRUK0080","CRUK0093","CRUK0091"))

+    }

+    tmp_df <- data.frame(colnames(all_uncondensed_alterations)[which(substr(colnames(all_uncondensed_alterations), 1, 8) %in% colOrder)], stringsAsFactors = FALSE)

+    tmp_df$SampleID <- substr(tmp_df[,1], 1, 8)

+    tmp_df$SampleID <- factor(tmp_df$SampleID, levels = colOrder)

+    tmp_df <- tmp_df[order(tmp_df$SampleID),]

+    uncondensed_alterations <- all_uncondensed_alterations[,tmp_df[,1]]

+    # mutations annotated as 0.5

+    uncondensed_beep <- uncondensed_alterations

+    uncondensed_beep[which(uncondensed_beep == -1)] <- 0

+    uncondensed_beep[which(uncondensed_beep == 1)] <- 0

+    # plot

+    layout(matrix(1:(nrow(uncondensed_alterations)), ncol = 1))

+    par(mar = c(2,2,1,2), oma = c(4,4,4,4))

+    tmp_acceptedRegions_df <- acceptedRegions_df[match(colnames(uncondensed_alterations), acceptedRegions_df$CRUKidRegion),]

+    sapply(1:nrow(uncondensed_alterations), FUN = function(x){

+      bp <-  barplot(rep(1,ncol(uncondensed_alterations)),border = NA,axes = FALSE,

+                     col = ifelse(as.numeric(uncondensed_alterations[x,]) == 1, yes = 'red', no = ifelse(as.numeric(uncondensed_alterations[x,]) < 0, yes = 'darkblue', no = 'lightgrey')),

+                     space = unlist(sapply(tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)], FUN = function(x){c(2,rep(0.2,(x-1)))}))

+      )

+      rect(xleft = bp[,1]-0.15, xright = bp[,1]+0.15, ybottom = 0.2, ytop = uncondensed_beep[x,]*1 + 0.2,

+           col = '#008000', border = NA)

+      mtext(text = rownames(uncondensed_alterations)[x], side=2,line = -25, cex = 5, las = 2, outer = FALSE, col = 'black')

+    })

+    bp <-  barplot(rep(1,ncol(uncondensed_alterations)),border = NA,axes = FALSE,space = unlist(sapply(tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)], FUN = function(x){c(2,rep(0.2,(x-1)))})), plot = FALSE)

+    nr          <- tmp_acceptedRegions_df$nregions[!duplicated(tmp_acceptedRegions_df$CRUKid)]

+    names(nr)   <- unique(tmp_acceptedRegions_df$CRUKid)

+  }

+  par(orig_par)

+}

+# Figure S9A,C,E

+{

+  par(mfrow = c(3,2))

+  tmp     <- patient.summary[which(patient.summary$simple.hist == 'LUAD'),]

+  tmp$tmp <- ifelse(tmp$ClonalNeo > summary(tmp$ClonalNeo)['3rd Qu.'], yes = 'high', no = 'low')

+  survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ tmp,

+           data = tmp,

+           main = "LUAD -- survival by upper quartile clonal neo",

+           xlab = 'Time (days)',

+           ylab = "PFS",

+           col = c('darkblue', 'darkred'),

+           las = 1,

+           legend.pos = NA,

+           lwd = 1)

+  tmp$tmp <- ifelse(tmp$SubclonalNeo > summary(tmp$SubclonalNeo)['3rd Qu.'], yes = 'high', no = 'low')

+  survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ tmp,

+           data = tmp,

+           main = "LUAD -- survival by upper quartile subclonal neo",

+           xlab = 'Time (days)',

+           ylab = "PFS",

+           col = c('darkblue', 'darkred'),

+           las = 1,

+           legend.pos = NA,

+           lwd = 1)

+  tmp$tmp <- ifelse(tmp$TotalNeo > summary(tmp$TotalNeo)['3rd Qu.'], yes = 'high', no = 'low')

+  survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ tmp,

+           data = tmp,

+           main = "LUAD -- survival by upper quartile total neo",

+           xlab = 'Time (days)',

+           ylab = "PFS",

+           col = c('darkblue', 'darkred'),

+           las = 1,

+           legend.pos = NA,

+           lwd = 1)

+  tmp     <- patient.summary[which(patient.summary$simple.hist == 'LUSC'),]

+  tmp$tmp <- ifelse(tmp$ClonalNeo > summary(tmp$ClonalNeo)['3rd Qu.'], yes = 'high', no = 'low')

+  survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ tmp,

+           data = tmp,

+           main = "LUSC -- survival by upper quartile clonal neo",

+           xlab = 'Time (days)',

+           ylab = "PFS",

+           col = c('darkblue', 'darkred'),

+           las = 1,

+           legend.pos = NA,

+           lwd = 1)

+  tmp$tmp <- ifelse(tmp$SubclonalNeo > summary(tmp$SubclonalNeo)['3rd Qu.'], yes = 'high', no = 'low')

+  survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ tmp,

+           data = tmp,

+           main = "LUSC -- survival by upper quartile subclonal neo",

+           xlab = 'Time (days)',

+           ylab = "PFS",

+           col = c('darkblue', 'darkred'),

+           las = 1,

+           legend.pos = NA,

+           lwd = 1)

+  tmp$tmp <- ifelse(tmp$TotalNeo > summary(tmp$TotalNeo)['3rd Qu.'], yes = 'high', no = 'low')

+  survplot(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ tmp,

+           data = tmp,

+           main = "LUAD -- survival by upper quartile total neo",

+           xlab = 'Time (days)',

+           ylab = "PFS",

+           col = c('darkblue', 'darkred'),

+           las = 1,

+           legend.pos = NA,

+           lwd = 1)

+}

+# Figure S9B,D ###

+{

+  par(mfrow = c(2,2), xpd = TRUE)

+  # # luad

+  tmp        <- patient.summary[which(patient.summary$simple.hist == 'LUAD'),]

+  luad_pvals <- data.frame(row.names = 1:max(tmp$ClonalNeo))

+  luad_pvals$hr <- NA

+  luad_pvals$p  <- NA

+  luad_pvals$cil <- NA

+  luad_pvals$cih <- NA

+  luad_pvals$nu <- NA

+  luad_pvals$hr_s <- NA

+  luad_pvals$p_s  <- NA

+  luad_pvals$cil_s <- NA

+  luad_pvals$cih_s <- NA

+  luad_pvals$nu_s <- NA

+  for(n in 1:max(tmp$ClonalNeo)){

+    test <- summary(coxph(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ ClonalNeo > n, data = tmp))

+    hr  <- test$conf.int[1]

+    cil <- test$conf.int[3]

+    cih <- test$conf.int[4]

+    p   <- test$sctest['pvalue']

+    nu  <- survfit(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ ClonalNeo > n, data = tmp)$n[2]

+    test <- summary(coxph(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ SubclonalNeo > n, data = tmp))

+    hr_s <- test$conf.int[1]

+    cil_s <- test$conf.int[3]

+    cih_s <- test$conf.int[4]

+    p_s  <- test$sctest['pvalue']

+    nu_s <- survfit(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ SubclonalNeo > n, data = tmp)$n[2]

+    luad_pvals[as.character(n),'hr'] <- hr

+    luad_pvals[as.character(n),'p']  <- p

+    luad_pvals[as.character(n),'cil']  <- cil

+    luad_pvals[as.character(n),'cih']  <- cih

+    luad_pvals[as.character(n),'nu'] <- nu

+    luad_pvals[as.character(n),'hr_s'] <- hr_s

+    luad_pvals[as.character(n),'p_s']  <- p_s

+    luad_pvals[as.character(n),'cil_s']  <- cil_s

+    luad_pvals[as.character(n),'cih_s']  <- cih_s

+    luad_pvals[as.character(n),'nu_s'] <- nu_s

+  }

+  keep_luad <- luad_pvals

+  luad_pvals <- luad_pvals[20:500,]

+  rbPal           <- colorRampPalette(c('white','darkblue'))

+  luad_pvals$ncol <- rbPal(30)[as.numeric(cut(luad_pvals$nu,breaks = 30))]

+  luad_pvals$col <- ifelse(luad_pvals$p < 0.05, yes = 'red', no = 'black')

+  luad_pvals$cih[which(luad_pvals$cih > 4)] <- 4

+  plot(x = rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)], y = luad_pvals$hr[seq(1, length(luad_pvals$hr), 3)], ylim = c(0, 5), xlim = c(20, 500), pch = NA, ylab = 'HR', xlab = '#ClonalNeo used as threshold', las = 1)

+  #segments(x0=as.numeric(rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)]), y0=luad_pvals$cil[seq(1, length(rownames(luad_pvals)), 3)],

+  #         x1=as.numeric(rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)]), y1=luad_pvals$cih[seq(1, length(rownames(luad_pvals)),3)])

+  points(x = rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)], y = luad_pvals$hr[seq(1, length(luad_pvals$hr), 3)], col = luad_pvals$col[seq(1, length(luad_pvals$col), 3)], pch = 16)

+  points(x = rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)], y = rep(5, length(seq(1, length(rownames(luad_pvals)), 3))) , col = luad_pvals$ncol, pch =15)

+  points(x = as.numeric(summary(tmp$ClonalNeo))[c(2,3,5)], y = c(5,5,5), pch = 15, col = 'gold')

+  abline(h = 1, col = 'red', lty = 2)

+  text(x = 100, y = 4.9, labels = 'number patients in high neo group', cex = 0.7)

+  luad_pvals$ncol_s <- rbPal(30)[as.numeric(cut(luad_pvals$nu_s,breaks = 30))]

+  luad_pvals$col_s <- ifelse(luad_pvals$p_s < 0.05, yes = 'red', no = 'black')

+  luad_pvals$cih_s[which(luad_pvals$cih_s > 4)] <- 4

+  plot(x = rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)], y = luad_pvals$hr_s[seq(1, length(luad_pvals$hr_s), 3)], ylim = c(0, 4), xlim = c(20, 500), pch = NA, ylab = 'HR', xlab = '#SubclonalNeo used as threshold', las = 1)

+  #segments(x0=as.numeric(rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)]), y0=luad_pvals$cil_s[seq(1, length(rownames(luad_pvals)), 3)],

+  #         x1=as.numeric(rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)]), y1=luad_pvals$cih_s[seq(1, length(rownames(luad_pvals)),3)])

+  points(x = rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)], y = luad_pvals$hr_s[seq(1, length(luad_pvals$hr_s), 3)], col = luad_pvals$col_s[seq(1, length(luad_pvals$col_s), 3)], pch = 16)

+  points(x = rownames(luad_pvals)[seq(1, length(rownames(luad_pvals)), 3)], y = rep(5, length(seq(1, length(rownames(luad_pvals)), 3))) , col = luad_pvals$ncol_s, pch =15)

+  points(x = as.numeric(summary(tmp$SubclonalNeo))[c(2,3,5)], y = c(5,5,5), pch = 15, col = 'gold')

+  abline(h = 1, col = 'red', lty = 2)

+  text(x = 100, y = 4.9, labels = 'number patients in high neo group', cex = 0.7)

+  luad_pvals <- keep_luad

+  # # lusc

+  tmp        <- patient.summary[which(patient.summary$simple.hist == 'LUSC'),]

+  lusc_pvals <- data.frame(row.names = 1:max(tmp$ClonalNeo))

+  lusc_pvals$hr <- NA

+  lusc_pvals$p  <- NA

+  lusc_pvals$cil <- NA

+  lusc_pvals$cih <- NA

+  lusc_pvals$nu <- NA

+  lusc_pvals$hr_s <- NA

+  lusc_pvals$p_s  <- NA

+  lusc_pvals$cil_s <- NA

+  lusc_pvals$cih_s <- NA

+  lusc_pvals$nu_s <- NA

+  for(n in 1:max(tmp$ClonalNeo)){

+    test <- summary(coxph(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ ClonalNeo > n, data = tmp))

+    hr  <- test$conf.int[1]

+    cil <- test$conf.int[3]

+    cih <- test$conf.int[4]

+    p   <- test$sctest['pvalue']

+    nu  <- survfit(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ ClonalNeo > n, data = tmp)$n[2]

+    test <- summary(coxph(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ SubclonalNeo > n, data = tmp))

+    hr_s <- test$conf.int[1]

+    cil_s <- test$conf.int[3]

+    cih_s <- test$conf.int[4]

+    p_s  <- test$sctest['pvalue']

+    nu_s <- survfit(Surv(allan_final_dsf_time, allan_final_dsf == "1") ~ SubclonalNeo > n, data = tmp)$n[2]

+    lusc_pvals[as.character(n),'hr'] <- hr

+    lusc_pvals[as.character(n),'p']  <- p

+    lusc_pvals[as.character(n),'cil']  <- cil

+    lusc_pvals[as.character(n),'cih']  <- cih

+    lusc_pvals[as.character(n),'nu'] <- nu

+    lusc_pvals[as.character(n),'hr_s'] <- hr_s

+    lusc_pvals[as.character(n),'p_s']  <- p_s

+    lusc_pvals[as.character(n),'cil_s']  <- cil_s

+    lusc_pvals[as.character(n),'cih_s']  <- cih_s

+    lusc_pvals[as.character(n),'nu_s'] <- nu_s

+  }

+  keep_lusc <- lusc_pvals

+  lusc_pvals <- lusc_pvals[20:500,]

+  lusc_pvals      <- keep_lusc[!is.na(keep_lusc$nu),]

+  lusc_pvals$ncol <- rbPal(30)[as.numeric(cut(lusc_pvals$nu,breaks = 30))]

+  lusc_pvals$col <- ifelse(lusc_pvals$p < 0.05, yes = 'red', no = 'black')

+  lusc_pvals$cih[which(lusc_pvals$cih > 2)] <- 2

+  plot(x = rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)], y = lusc_pvals$hr[seq(1, length(lusc_pvals$hr), 3)], ylim = c(0, 2), xlim = c(33, 474), pch = NA, ylab = 'HR', xlab = '#ClonalNeo used as threshold', las = 1)

+  #segments(x0=as.numeric(rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)]), y0=lusc_pvals$cil[seq(1, length(rownames(lusc_pvals)), 3)],

+  #         x1=as.numeric(rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)]), y1=lusc_pvals$cih[seq(1, length(rownames(lusc_pvals)),3)])

+  points(x = rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)], y = lusc_pvals$hr[seq(1, length(lusc_pvals$hr), 3)], col = lusc_pvals$col[seq(1, length(lusc_pvals$col), 3)], pch = 16)

+  points(x = rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)], y = rep(2.5, length(seq(1, length(rownames(lusc_pvals)), 3))) , col = lusc_pvals$ncol, pch =15)

+  points(x = as.numeric(summary(tmp$ClonalNeo))[c(2,3,5)], y = c(2.5,2.5,2.5), pch = 15, col = 'gold')

+  abline(h = 1, col = 'red', lty = 2)

+  text(x = 120, y = 2.4, labels = 'number patients in high neo group', cex = 0.7)

+  lusc_pvals      <- keep_lusc[!is.na(keep_lusc$nu_s),]

+  lusc_pvals$ncol_s <- rbPal(30)[as.numeric(cut(lusc_pvals$nu_s,breaks = 30))]

+  lusc_pvals$col_s <- ifelse(lusc_pvals$p_s < 0.05, yes = 'red', no = 'black')

+  lusc_pvals$cih_s[which(lusc_pvals$cih_s > 5)] <- 5

+  plot(x = rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)], y = lusc_pvals$hr_s[seq(1, length(lusc_pvals$hr_s), 3)], ylim = c(0, 8), xlim = c(0, 355), col = lusc_pvals$col_s[seq(1, length(lusc_pvals$col_s), 3)], pch = 16, ylab = 'HR', xlab = '#SubclonalNeo used as threshold', las = 1)

+  #segments(x0=as.numeric(rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)]), y0=lusc_pvals$cil_s[seq(1, length(rownames(lusc_pvals)), 3)],

+  #         x1=as.numeric(rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)]), y1=lusc_pvals$cih_s[seq(1, length(rownames(lusc_pvals)),3)])

+  points(x = rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)], y = lusc_pvals$hr_s[seq(1, length(lusc_pvals$hr_s), 3)], col = lusc_pvals$col_s[seq(1, length(lusc_pvals$col_s), 3)], pch = 16)

+  points(x = rownames(lusc_pvals)[seq(1, length(rownames(lusc_pvals)), 3)], y = rep(6, length(seq(1, length(rownames(lusc_pvals)), 3))) , col = lusc_pvals$ncol_s, pch =15)

+  points(x = as.numeric(summary(tmp$SubclonalNeo))[c(2,3,5)], y = c(6,6,6), pch = 15, col = 'gold')

+  abline(h = 1, col = 'red', lty = 2)

+  text(x = 100, y = 5.9, labels = 'number patients in high neo group', cex = 0.7)

+}

+# Figure S9F

+{

+  tmp <- patient.summary[which(patient.summary$simple.hist %in% c('LUAD', 'LUSC')),]

+  tmp$low_evasion_clonal <- ifelse(tmp$low_evasion == TRUE | tmp$high_clonal == TRUE, yes =TRUE, no = FALSE)

+  tmp$histology <- tmp$simple.hist

+  tmp <- within(tmp, {

+    histology <- factor(histology)

+    sex <- factor(sex, labels = c("Female", "Male"))

+    stage <- factor(allan_stage_no_3b, levels = c('1a', '1b', '2a', '2b', '3a'), labels = c('1a', '1b', '2a', '2b', '3'))

+    Adjuvant.therapy <- factor(Adjuvant.therapy, levels = c('No adjuvant treatment', 'Adjuvant'))

+    low_evasion <- factor(low_evasion, levels = c('FALSE', 'TRUE'))

+    low_evasion_clonal <- factor(low_evasion_clonal, levels = c('FALSE', 'TRUE'))

+    number.unique.hlas <- as.numeric(number.unique.hlas)

+  })

+  tmp_model <- coxph(Surv(allan_final_dsf_time, allan_final_dsf == '1') ~ histology + age + sex + stage + packyears + Adjuvant.therapy + low_evasion_clonal, data=tmp)

+  ggforest(tmp_model, data = NULL, main = "Hazard ratio", fontsize = 0.7, refLabel = "reference", noDigits = 2)

+}

+tmp_model <- coxph(Surv(allan_final_dsf_time, allan_final_dsf == '1') ~ histology + age + sex + stage + packyears + Adjuvant.therapy + low_evasion + number.unique.hlas, data=tmp)

+ggforest(tmp_model, data = NULL, main = "Hazard ratio", fontsize = 0.7, refLabel = "reference", noDigits = 2)

Snippets

Rachel Rosenthal Generate manuscript figures

File generateFigures.R Modified

File generateFigures.R Added