# This document showcases how we compute the biomarker # The required input consists of two pieces of data # 1. tcr_freq: a dataframe with at least two columns # One of the columns would be an identifier for TCR clones or TCRs # Another column is the corresponding clonal size or productive frequency # 2. tcr: a matrix (names) where each row is a pMHC and each column is # is a TCR clone or TCR. The values are the affinity binding # scores predicted by pMTnet-omni # NOTE we assume that the order of the TCRs in tcr_freq corresponds to # that in the matrix tcr cal_enrich <- function(tcr, tcr_freq, top) { # First rank tcrs in descending order rank_freq = rank(-tcr_freq$size) # Retain only top tcrs tcr = tcr[, rank_freq <= top] return(mean(tcr<0.03)/0.03) } biomarker <- function(tcr, tcr_freq, top_list=c(3, 5, 10, 25)) { val = 0 for(top in top_list) { val = val + cal_enrich(tcr, tcr_freq, top) } val = val/length(top_list) return(val) }