calculateTargeting fails due to gaps in subSeq

Issue #103 resolved
ssnn created an issue

This gives error "Error in targetingModel@targeting[, subSeq] : subscript out of bounds"

germlineSeq <-"..........................................................................CTCTGGTGGCTCCATCAGC......AGTGGTGATTACTACTGGAGTTGGATCCGCCAGCNCCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGT.........GGGAGCACCTACTACAACCCGTCCCTCAAG...AGTCGAGTCACCATATCAGTAGACACGTCCAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCAGACACGGCCGTGTATTACTGTGCCAGAGNNNNNNNNNNNNNNNNNNNNGATGCTTTTGATGTCTGGGGCCAAGGGACAATGGTCACCGTCTCTTCAG"
inputSeq <- "............................................................................................C......AGACGTGTGCTCTTCCGATCTATGAAAGACGCTCAGGTGATCTACGGCCTGGAGTGGATTGGCTACATCTTTCAAAGT.........GGGAGCACCTACAACAGCCCGTCCCTCAAG...AGTCGAGTCACCATATCAAGAGACACGTCCAAGAACCACTTCTCCCTGAAACTGAGCTCTGTGACCGCCGCGGACACGGCCACTTATTACTGT"

shazam:::calculateTargeting(germlineSeq,
                               inputSeq,
                               targetingModel=HH_S5F,
                               regionDefinition=NULL) 

The problem comes from some 5-mers in subSeq having gaps:

> subSeq
  [1] "NN..C" "N..CT" "..CTC" ".CTCT" "CTCTG" "TCTGG" "CTGGT" "TGGTG" "GGTGG"
 [10] "GTGGC" "TGGCT" "GGCTC" "GCTCC" "CTCCA" "TCCAT" "CCATC" "CATCA" "ATCAG"
 [19] "TCAGC" "CAGCA" "AGCAG" "GCAGT" "CAGTG" "AGTGG" "GTGGT" "TGGTG" "GGTGA"
 [28] "GTGAT" "TGATT" "GATTA" "ATTAC" "TTACT" "TACTA" "ACTAC" "CTACT" "TACTG"
 [37] "ACTGG" "CTGGA" "TGGAG" "GGAGT" "GAGTT" "AGTTG" "GTTGG" "TTGGA" "TGGAT"
 [46] "GGATC" "GATCC" "ATCCG" "TCCGC" "CCGCC" "CGCCA" "GCCAG" "CCAGC" "CAGCN"
 [55] "AGCNC" "GCNCC" "CNCCC" "NCCCA" "CCCAG" "CCAGG" "CAGGG" "AGGGA" "GGGAA"
 [64] "GGAAG" "GAAGG" "AAGGG" "AGGGC" "GGGCC" "GGCCT" "GCCTG" "CCTGG" "CTGGA"
 [73] "TGGAG" "GGAGT" "GAGTG" "AGTGG" "GTGGA" "TGGAT" "GGATT" "GATTG" "ATTGG"
 [82] "TTGGG" "TGGGT" "GGGTA" "GGTAC" "GTACA" "TACAT" "ACATC" "CATCT" "ATCTA"
 [91] "TCTAT" "CTATT" "TATTA" "ATTAC" "TTACA" "TACAG" "ACAGT" "CAGTG" "AGTGG"
[100] "GTGGG" "TGGGA" "GGGAG" "GGAGC" "GAGCA" "AGCAC" "GCACC" "CACCT" "ACCTA"
[109] "CCTAC" "CTACT" "TACTA" "ACTAC" "CTACA" "TACAA" "ACAAC" "CAACC" "AACCC"
[118] "ACCCG" "CCCGT" "CCGTC" "CGTCC" "GTCCC" "TCCCT" "CCCTC" "CCTCA" "CTCAA"
[127] "TCAAG" "CAAGA" "AAGAG" "AGAGT" "GAGTC" "AGTCG" "GTCGA" "TCGAG" "CGAGT"
[136] "GAGTC" "AGTCA" "GTCAC" "TCACC" "CACCA" "ACCAT" "CCATA" "CATAT" "ATATC"
[145] "TATCA" "ATCAG" "TCAGT" "CAGTA" "AGTAG" "GTAGA" "TAGAC" "AGACA" "GACAC"
[154] "ACACG" "CACGT" "ACGTC" "CGTCC" "GTCCA" "TCCAA" "CCAAG" "CAAGA" "AAGAA"
[163] "AGAAC" "GAACC" "AACCA" "ACCAG" "CCAGT" "CAGTT" "AGTTC" "GTTCT" "TTCTC"
[172] "TCTCC" "CTCCC" "TCCCT" "CCCTG" "CCTGA" "CTGAA" "TGAAG" "GAAGC" "AAGCT"
[181] "AGCTG" "GCTGA" "CTGAG" "TGAGC" "GAGCT" "AGCTC" "GCTCT" "CTCTG" "TCTGT"
[190] "CTGTG" "TGTGA" "GTGAC" "TGACT" "GACTG" "ACTGC" "CTGCC" "TGCCG" "GCCGC"
[199] "CCGCA" "CGCAG" "GCAGA" "CAGAC" "AGACA" "GACAC" "ACACG" "CACGG" "ACGGC"
[208] "CGGCC" "GGCCG" "GCCGT" "CCGTG" "CGTGT" "GTGTA" "TGTAT" "GTATT" "TATTA"
[217] "ATTAC" "TTACT" "TACTG" "ACTGT" "CTGTN" "TGTNN"

The same code used to work in previous versions of shazam (Version: 0.1.8.999 Date: 2017-12-29)

Comments (2)

  1. Log in to comment