"""Provide design algorithms to work around ambiguous designs.
"""
import re
from Bio.Seq import Seq
from Bio.Alphabet.IUPAC import unambiguous_dna
from SynBio.OligoSequences.Oligo import (DesignLinkerEndCorrect,
OligoDesignerMixin, AssemblyDesignError)
from OligoRegion import FixedEndOligo
class AmbiguousOverlapDesign(OligoDesignerMixin):
"""Design that avoid ambiguous regions in oligo overlaps.
This attempts to do oligo designs based on avoiding putting ambiguous
regions in overlaps. By avoiding this, the necessary ambiguous oligos
can be ordered separately and combined into a PAM reaction following
duplexing and MutS filtering.
Currently, ambiguous regions are recognized by a hack -- special non GATC
bases embedded in the sequence.
"""
def __init__(self, max_size, special_max, parameters,
assembly_props, pre_builder, post_builder, ambig_letters):
assert len(parameters) >= 0
(self._buffer, self._overlap, self._temp_thresh, self._five_size,
self._three_size) = parameters[0]
self._extra_params = parameters[1:]
self._min_size = max(self._overlap + 1, 20)
self._max_size = max_size
self._special_max = special_max
self._assembly_props = assembly_props
self._pre_builder = pre_builder
self._post_builder = post_builder
self._ambig_letters = ambig_letters
self._backup_design = DesignLinkerEndCorrect(self._max_size,
self._five_size, self._three_size,
[(self._buffer, self._overlap, self._temp_thresh)],
self._assembly_props, self._pre_builder, self._post_builder)
def get_builders(self):
"""Retrieve the pre and post builders associated with this breakdown.
"""
new_pre = self._pre_builder.copy()
new_post = self._post_builder.copy()
return new_pre, new_post
def get_flex_splitter(self):
if len(self._extra_params) == 0:
raise ValueError('No additional designers available.')
else:
new_design = AmbiguousOverlapDesign(self._max_size,
self._special_max, self._extra_params, self._assembly_props,
self._pre_builder, self._post_builder, self._ambig_letters)
new_design.set_melt_calc(self._melt_calc)
return new_design
def get_overlap(self):
return self._overlap
def set_melt_calc(self, melt_calc):
self._melt_calc = melt_calc
self._backup_design.set_melt_calc(melt_calc)
def get_min_max(self, conservative = False):
return self._min_size, self._max_size
def split(self, parent_name, sequence, is_first, is_last):
"""Split the sequence, avoiding ambiguous positions in the sequence.
The algorithm is:
Find ambiguous positions in the sequence
If no ambiguous positions in the sequence, use the standard splitter
Otherwise:
Find possible overlaps in the sequence, which are of the specified
minimum melting temperature and contain no ambiguous bases.
Starting at 0
While we haven't reached the end of the sequence
Find the best overlap end within the start and end range
Add piece, move start position to overlap start.
"""
ambig_positions = self._find_ambiguous(sequence)
# if we have no ambiguous positions, use the backup
if len(ambig_positions) == 0:
return self._backup_design.split(parent_name, sequence, is_first,
is_last)
else:
print ambig_positions
overlaps = self._find_overlaps(sequence, ambig_positions)
print '\n'.join([str(x) for x in overlaps])
all_oligos = self._design_breakpoints(parent_name, sequence,
overlaps)
for oligo in all_oligos:
if oligo.bad_size(self._min_size, self._special_max) != 0:
raise AssemblyDesignError("Oligo not in size: %s" % oligo)
return all_oligos
def _design_breakpoints(self, parent_name, sequence, overlaps):
"""Choose oligos in the sequence with allowed overlaps.
This walks from left to right across the sequence and picks oligos
which fall in the allowed overlaps. Oligos are chosen to maximize
size (and thus minimize the number of oligos) and to have reasonable
buffers around ambiguous pieces for adjustment.
"""
all_oligos = []
pos_overlaps = [x for x in overlaps if x.end == self._five_size]
if len(pos_overlaps) == 0:
raise AssemblyDesignError('No overlap for initial oligo: %s' %
self._five_size)
cur_overlap = pos_overlaps[0]
cur_oligo = FixedEndOligo(self._get_name(parent_name, len(all_oligos)),
sequence, 0, self._five_size, self._assembly_props, None,
self._buffer, five_fixed = True)
final_overlaps = [x for x in overlaps if x.start == (len(sequence) -
self._three_size)]
if len(final_overlaps) == 0:
raise AssemblyDesignError('No overlap for final oligo: %s' %
(len(sequence) - self._three_size))
final_overlap = final_overlaps[0]
while cur_oligo is not None:
all_oligos.append(cur_oligo)
cur_min = cur_overlap.start + self._min_size + self._buffer
cur_max = cur_overlap.start + self._max_size - self._buffer
special_max = cur_overlap.start + self._special_max - self._buffer
pos_overlaps = [x for x in overlaps if x.end in
range(cur_min, cur_max)]
special_overlaps = [x for x in overlaps if x.end in
range(cur_min, special_max)]
# terminating condition -- we are done splitting it
if cur_min >= final_overlap.end:
next_overlap = None
elif len(pos_overlaps) > 0:
next_overlap = self._choose_overlap(pos_overlaps,
cur_overlap, final_overlap)
elif len(special_overlaps) > 0:
next_overlap = self._choose_overlap(special_overlaps,
cur_overlap, final_overlap)
else:
raise AssemblyDesignError('No overlap at: %s %s' %
(cur_min, cur_max))
if next_overlap:
cur_oligo = FixedEndOligo(self._get_name(parent_name,
len(all_oligos)), sequence, cur_overlap.start,
next_overlap.end, self._assembly_props, None, self._buffer)
print cur_overlap.start, next_overlap.end
cur_overlap = next_overlap
else:
cur_oligo = None
final_oligo = FixedEndOligo(self._get_name(parent_name,
len(all_oligos)), sequence, final_overlap.start, len(sequence),
self._assembly_props, None, self._buffer, three_fixed = True)
all_oligos.append(final_oligo)
if len(all_oligos) % 2 != 0:
raise AssemblyDesignError('Non-even number of oligos designed')
return all_oligos
def _choose_overlap(self, pos_overlaps, cur_overlap, final_overlap):
"""Pick an overlap to use from the given choices.
We want to pick this with the following restrictions:
- Keep us to an even number of oligos
- Do not choose right on ambiguous position junctions
- Work with the final overlap.
XXX this does not guarantee an even number of oligos right now.
"""
# print final_overlap
# print [str(x) for x in pos_overlaps]
overlap_final = [x for x in pos_overlaps if x.start ==
final_overlap.start]
if len(overlap_final) > 0:
return overlap_final[0]
else:
pos_overlaps.reverse()
for ol in pos_overlaps:
if ol.buffer_distance >= 3:
return ol
return pos_overlaps[0]
def _find_overlaps(self, sequence, ambiguous_positions):
"""Find available overlaps in the sequence.
Overlaps are:
- At least as long as _overlap
- Have a melting temperature at least _temp_thresh
- Do not contain any ambiguous positions
"""
overlaps = []
for index in range(len(sequence) - self._overlap):
cur_end = index + self._overlap
cur_melt = -1.0
while (cur_melt < self._temp_thresh and cur_end < len(sequence)):
cur_seq = sequence[index:cur_end]
cur_ambig = self._find_ambiguous(cur_seq)
if len(cur_ambig) > 0:
break
cur_comp = Seq(cur_seq, unambiguous_dna).complement().data
cur_melt = self._melt_calc.calculate_melting(cur_seq, cur_comp)
cur_end += 1
# if we are above the melt temp and no ambiguous positions
if cur_melt >= self._temp_thresh:
if (len(set(ambiguous_positions) &
set(range(index, cur_end))) == 0):
buffer_distances = []
for ambig in ambiguous_positions:
buffer_distances.append(abs(ambig - index))
buffer_distances.append(abs(ambig - cur_end))
overlaps.append(_OverlapRegion(index, cur_end, cur_melt,
min(buffer_distances)))
return overlaps
def _find_ambiguous(self, sequence):
"""Find any ambiguous positions in the sequence to design.
Currently this looks for specific letters which are use to
estimate ambiguity. Eventually this should be determined from database
information on ambiguous regions.
"""
ambig_positions = []
for check_ambig in self._ambig_letters:
pat = re.compile(r'%s' % check_ambig)
for match in pat.finditer(sequence):
start, end = match.span()
assert end == start + 1
assert sequence[start] == check_ambig
ambig_positions.append(start)
return ambig_positions
class _OverlapRegion:
"""Represent details on a potential overlap region in the sequence.
"""
def __init__(self, start, end, melt_temp, buffer_distance):
self.start = start
self.end = end
self.melt_temp = melt_temp
self.buffer_distance = buffer_distance
def __str__(self):
return 'OL: %s to %s; %s %s' % (self.start, self.end, self.melt_temp,
self.buffer_distance)