Error when subsampling while calculating distToNearest between groups

Issue #123 resolved
Julian Zhou created an issue

Below is a toy example for reproducing the error

library(shazam) # v0.1.11

data(ExampleDb, package="alakazam") 
db <- subset(ExampleDb)

db = alakazam::groupGenes(db, "V_CALL", "J_CALL", first=F) #v0.2.11

table(db[["ISOTYPE"]][db$VJ_GROUP==unique(db$VJ_GROUP)[95]])
# IgA IgD IgG IgM 
#  10   3   7  17

# 37 sequences
sequences = db[["JUNCTION"]][db$VJ_GROUP==unique(db$VJ_GROUP)[95]]
isotypes = db[["ISOTYPE"]][db$VJ_GROUP==unique(db$VJ_GROUP)[95]]

model="ham"
normalize="len"
symmetry="avg"
mst=FALSE

### WITHOUT subsampling

crossGroups=NULL
subsample=NULL

# this runs
shazam:::nearestDist(sequences, model, normalize, symmetry, crossGroups, mst, subsample)

### WITH subsampling

crossGroups=isotypes
subsample=2

# this fails with the following msg:
# Error in dist_mat[this_idx, other_idx] : subscript out of bounds

shazam:::nearestDist(sequences, model, normalize, symmetry, crossGroups, mst, subsample)

# traced error back to the following:

# 1) nonsquareDist calculates a non-symmetrical matrix (2 by 34)

# dist_mat <- nonsquareDist(seq_uniq, indx, dist_mat=getDNAMatrix(gap=0))

# 2) .dcross tries to subset dist_mat to row indices greater than 2 via this_idx

# setNames(sapply(1:length(sequences), .dcross), sequences)

# r <- dist_mat[this_idx, other_idx]

Comments (6)

  1. Log in to comment