"""Managers which handle the details of processing sequencing reads on clones.
"""
from Parts import ClonedSequence, QualitySequence
from SynBio.OligoDatabase.Database import SequenceMissingError
from SynBio.Sequencing.Process import (LocalReadAligner, PositionMapperPlus,
AnalyzedReadPlus)
class ReadAlignmentManager:
"""Manage the alignment of reads to a reference sequence.
"""
def __init__(self, origin_finder, work_dir, logger, only_ref_seq = True):
"""Initialize a manager for dealing with reads
only_ref_seq is a boolean specifying whether we should only report
sequencing information in the reference sequence. The alternative is
to report read info in the upstream and downstream vector regions.
"""
self._origin_finder = origin_finder
self._read_aligner = LocalReadAligner(work_dir, logger)
self._logger = logger
self._only_ref_seq = only_ref_seq
self._name_match_thresh = 0.9
def align_read(self, qual_seq, clone_ref):
"""Do the work on aligning a read to the reference sequence
qual_seq -- QualitySequence object which can be used to query the
actual read and quality information
clone_ref -- ClonedSequence object used to query the read
Returns an AnalyzedRead object which will be used to provide
information about the alignment.
"""
unique_parts = clone_ref.unique_name()
ref_name = '_'.join([str(x) for x in unique_parts])
self._logger.info('===== Aligning read %s to %s' %
(qual_seq.name, ref_name))
#print qual_seq.name
clone_ref_seqs = clone_ref.get_clone_ref_seqs()
if clone_ref_seqs.is_pool():
raise NotImplementedError('Pooled sequences not done')
else:
base_seq, wvec_seq = clone_ref_seqs.get_base_seqs()
ref_match = self._read_aligner.align_read_to_ref(qual_seq, ref_name,
wvec_seq)
# log the match details
length_percent = ref_match.match_length_percent()
match_score = ref_match.match_score()
self._logger.info("== %s %s %s" % (ref_match.is_rc,
length_percent, match_score))
self._logger.info(str(ref_match))
final_a = None
if ref_match.is_decent():
final_a = self._get_aligned_read(ref_match, wvec_seq,
base_seq)
if final_a is None or final_a.is_vector():
final_a = self._check_non_aligned(qual_seq, clone_ref)
self._logger.info('==================')
return final_a
def _get_aligned_read(self, ref_match, wvec_seq, base_seq):
"""Return details for a read that is correctly aligned to the reference.
"""
mapper = PositionMapperPlus(ref_match.ref_match_seq,
wvec_seq, base_seq, self._only_ref_seq)
return AnalyzedReadPlus(mapper, ref_match.read_match_seq,
ref_match.read_qual_values, ref_match.read_orig_seq,
ref_match.is_rc)
def _check_non_aligned(self, qual_seq, clone_ref):
"""Deal with a sequence that did not align to the reference sequence.
"""
for_seq_rec, for_qual_values = qual_seq.fasta_plus_quality()
design_a = clone_ref.get_design_assembly()
cur_a_name = design_a.get_name()
alt_origin = self._origin_finder.find_origin(
cur_a_name, for_seq_rec.sequence)
if alt_origin is None:
self._logger.info('Matches nothing')
return None
else:
self._logger.info('Alt origin: %s %s %s %s' %
(clone_ref.unique_name(), alt_origin.alt_name,
alt_origin.match_self, alt_origin.match_vector))
return alt_origin
class TrimmedReadManager:
"""Manage the sorting of reads onto associated clones.
Utilizing the information stored about a sequencing plate in the database,
this takes in trimmed reads and orders them according to which clone they
belong to on the plate.
"""
def __init__(self, name_parser, plate_db, work_dir, logger):
self._name_parser = name_parser
self._plate_db = plate_db
self._work_dir = work_dir
self._logger = logger
self._clones = []
self._read_info = {"total" : None, "quality" : 0}
def set_read_count(self, total_reads):
assert self._read_info["total"] is None, "Already set read count"
self._read_info["total"] = total_reads
def get_read_info(self):
return self._read_info
def add_qual_sequence(self, name, seq_list, quality_list, orig_read_seq):
"""Add a quality sequence, getting info from the file name.
"""
plate, pos, final_pos = self._name_parser.parse_abi_name(name)
self._plate_db.verify_plate(plate)
self._process_read(name, plate, pos, final_pos, seq_list, quality_list,
orig_read_seq)
def _process_read(self, name, plate, pos, final_pos, seq_list,
quality_list, orig_read_seq):
self._logger.info(" *> Adding read on plate %s, position %s" %
(plate, pos))
plate_well = self._plate_db.get_well(pos, self._work_dir)
clone_index = self._add_clone(plate_well)
if len(seq_list) > 0 and len(quality_list) > 0:
self._read_info["quality"] += 1
clip_left, clip_right = self._find_clip_values(seq_list,
orig_read_seq)
self._add_qual_read(clone_index, plate, final_pos, name,
seq_list, quality_list, clip_left, clip_right)
def _find_clip_values(self, clip_seq_list, orig_read_seq):
"""Find the regions where the left and right of this read are trimmed.
This returns the total trim (vector plus quality) used to find
the region of a ab1 read under use.
"""
clip_seq = "".join(clip_seq_list)
clip_start = orig_read_seq.find(clip_seq)
assert clip_start > 0, "We lost our sequence in trimming %s : %s" % \
(clip_seq, orig_read_seq)
clip_end = clip_start + len(clip_seq)
return clip_start, clip_end
def _add_clone(self, plate_well):
new_clone = ClonedSequence(plate_well)
if new_clone not in self._clones:
self._clones.append(new_clone)
clone_index = self._clones.index(new_clone)
return clone_index
def _add_qual_read(self, clone_index, plate, final_pos, name,
seq_list, quality_list, clip_left, clip_right):
"""Add a quality trimmed read to our current reads.
"""
new_read = QualitySequence(name, plate, final_pos,
seq_list, quality_list, clip_left, clip_right)
self._clones[clone_index].add_read(new_read)
def get_sequenced_clones(self):
"""Return all unique sequenced clones.
"""
return self._clones