# This document showcases how we compute the biomarker
# The required input consists of two pieces of data
# 1. tcr_freq: a dataframe with at least two columns
# One of the columns would be an identifier for TCR clones or TCRs
# Another column is the corresponding clonal size or productive frequency
# 2. tcr: a matrix (names) where each row is a pMHC and each column is
# is a TCR clone or TCR. The values are the affinity binding
# scores predicted by pMTnet-omni
# NOTE we assume that the order of the TCRs in tcr_freq corresponds to
# that in the matrix tcr
cal_enrich <- function(tcr, tcr_freq, top)
{
# First rank tcrs in descending order
rank_freq = rank(-tcr_freq$size)
# Retain only top tcrs
tcr = tcr[, rank_freq <= top]
return(mean(tcr<0.03)/0.03)
}
biomarker <- function(tcr, tcr_freq,
top_list=c(3, 5, 10, 25))
{
val = 0
for(top in top_list)
{
val = val + cal_enrich(tcr, tcr_freq, top)
}
val = val/length(top_list)
return(val)
}