- edited description
Error when subsampling while calculating distToNearest between groups
Issue #123
resolved
Below is a toy example for reproducing the error
library(shazam) # v0.1.11
data(ExampleDb, package="alakazam")
db <- subset(ExampleDb)
db = alakazam::groupGenes(db, "V_CALL", "J_CALL", first=F) #v0.2.11
table(db[["ISOTYPE"]][db$VJ_GROUP==unique(db$VJ_GROUP)[95]])
# IgA IgD IgG IgM
# 10 3 7 17
# 37 sequences
sequences = db[["JUNCTION"]][db$VJ_GROUP==unique(db$VJ_GROUP)[95]]
isotypes = db[["ISOTYPE"]][db$VJ_GROUP==unique(db$VJ_GROUP)[95]]
model="ham"
normalize="len"
symmetry="avg"
mst=FALSE
### WITHOUT subsampling
crossGroups=NULL
subsample=NULL
# this runs
shazam:::nearestDist(sequences, model, normalize, symmetry, crossGroups, mst, subsample)
### WITH subsampling
crossGroups=isotypes
subsample=2
# this fails with the following msg:
# Error in dist_mat[this_idx, other_idx] : subscript out of bounds
shazam:::nearestDist(sequences, model, normalize, symmetry, crossGroups, mst, subsample)
# traced error back to the following:
# 1) nonsquareDist calculates a non-symmetrical matrix (2 by 34)
# dist_mat <- nonsquareDist(seq_uniq, indx, dist_mat=getDNAMatrix(gap=0))
# 2) .dcross tries to subset dist_mat to row indices greater than 2 via this_idx
# setNames(sapply(1:length(sequences), .dcross), sequences)
# r <- dist_mat[this_idx, other_idx]
Comments (6)
-
reporter -
reporter - edited description
-
reporter - edited description
-
reporter - edited description
-
-
assigned issue to
-
assigned issue to
-
reporter - changed status to resolved
- Log in to comment