"""Design of overlapping oligomers to span a region.
This code does the hard work of designing oligos which can be assembled via a
PAM reaction.
"""
import random
import math
import scipy
from Bio.Seq import Seq
from Bio.Alphabet.IUPAC import unambiguous_dna
from OligoRegion import OligoRegion, AdjustableOligo, FixedOverlapOligo, \
FixedEndOligo
class AssemblyDesignError(Exception):
pass
class AbuttingOligoDesign:
"""Design oligos that touch each other and thus completely overlap.
These oligos are not designed based on calculating melting overlaps,
but rather just perfectly overlap each other from end to end. The impetus
for this design is the idea that fully overlapping oligos help reduce
errors.
"""
def __init__(self, params, melt_calc):
assert params.cut_five == 0 and params.cut_three == 0, \
"Can't handle cut ends"
self._min_size = params.min_size
self._max_size = params.max_size
def split(self, sequence):
"""Split the sequence into completely overlapping oligos.
"""
# first oligo is on the 5' most side
first_oligo = OligoRegion(sequence, 0, self._max_size)
# second oligo is half-way across the first -- integer division
second_start = self._max_size / 2
second_oligo = OligoRegion(sequence, second_start, second_start +
self._max_size)
all_oligos = [first_oligo, second_oligo]
while 1:
# oligos start at one past the end point of the previous sequence
# on the same strand (two oligos back)
oligo_start = all_oligos[-2].end
oligo_end = oligo_start + self._max_size
# if we are off the 3' end, we are finished, and want to make
# sure that the last two oligos are more than the minimum size
if oligo_end > len(sequence):
oligo_end = len(sequence)
# big enough, we can be done with it
if oligo_end - oligo_start >= self._min_size:
new_oligo = OligoRegion(sequence, oligo_start, oligo_end)
# not big enough, adjust the last two oligos to make them both
# bigger than the minimum size
else:
second_oligo, new_oligo = self._adjust_oligos(
all_oligos[-2], sequence, oligo_start, oligo_end)
prev_oligo = all_oligos.pop(-1)
old_second = all_oligos.pop(-1)
all_oligos.append(second_oligo)
all_oligos.append(prev_oligo)
# easy case -- just add the new oligo
else:
new_oligo = OligoRegion(sequence, oligo_start, oligo_end)
all_oligos.append(new_oligo)
if oligo_end == len(sequence):
break
return all_oligos
def _adjust_oligos(self, second_oligo, sequence, new_start, new_end):
"""Adjust the last three oligos so that the final oligo is not small.
This is necessary if the new oligo (end - start) is less than the
minimum specified size of oligos.
"""
assert new_end - new_start < self._min_size
# do the adjustment by the minimum amount needed to
amount_adjust = self._min_size - (new_end - new_start)
second_oligo.end = second_oligo.end - amount_adjust
new_start = new_start - amount_adjust
assert second_oligo.end == new_start
assert new_end - new_start >= self._min_size
new_oligo = OligoRegion(sequence, new_start, new_end)
return second_oligo, new_oligo
class _WorkOligo:
"""An oligo that is being worked on/adjusted in the design process.
This encompasses the work that is done on an oligo, while also letting it
internally maintain its position within a group of oligos.
The usefulness of this is that you can do work with a bit of randomness to
avoid continually adjusting the same oligos. The issue you run into with
continually adjusting the same oligos is that you may swing back and forth
with overlaps between two extremes, being unable to find a happy middle
position. By randomizing, we can check all oligos for adjustments, and
help avoid this problem.
Eventually, we can use this for querying oligos for the "best" oligos to
adjust (based on some criteria for best that I haven't even begun to make
up yet) so that we don't have to rely so much on randomness, which is of
course very much a hack-y way to do this.
"""
def __init__(self, index, oligo, min_size, max_size,
melt_calc, melt_thresh):
"""Initialize with a sequence and oligo position.
"""
self.index = index
self.oligo = oligo
self._min_size = min_size
self._max_size = max_size
self._melt_calc = melt_calc
self._melt_thresh = melt_thresh
def __cmp__(self, other):
"""Allow sorting by oligo indexes.
"""
# sanity check to prevent multiple oligos with the same index
if self.index == other.index:
assert self.start() == other.start() and self.end() == other.end()
return cmp(self.index, other.index)
def __hash__(self):
return hash(self.index)
def copy(self):
new_oligo = self.oligo.copy()
return _WorkOligo(self.index, new_oligo, self._min_size,
self._max_size, self._melt_calc, self._melt_thresh)
def raw_oligo(self):
return self.oligo.copy()
def __str__(self):
"""Print out debugging information.
"""
return "Oligo %s (%s,%s)" % (self.index, self.oligo.start,
self.oligo.end)
# encapsulate operations on this oligo -- these are all pass through
# operations that just try to minimize the passing around of shared
# parameters like min_size, max_size and melting temperature info
def start(self):
return self.oligo.start
def end(self):
return self.oligo.end
def shrink_oligo_three(self, num_adjust):
new_start, change = self.oligo.shrink_oligo_three(num_adjust,
self._melt_calc, self._melt_thresh, self._min_size)
def expand_oligo_three(self, num_adjust):
new_start, change = self.oligo.expand_oligo_three(num_adjust,
self._melt_calc, self._melt_thresh, self._max_size)
def three_side_overlap(self):
new_start, change = self.oligo.three_side_overlap(
self._melt_calc, self._melt_thresh)
return new_start
def shift_start(self, new_start):
self.oligo.shift_start(new_start)
def at_min_size(self):
return self.oligo.at_min_size(self._min_size)
def at_max_size(self):
return self.oligo.at_max_size(self._max_size)
def to_buffer_max(self):
return self.oligo.to_buffer_max(self._max_size)
def to_buffer_min(self):
return self.oligo.to_buffer_min(self._min_size)
def bad_size(self):
return self.oligo.bad_size(self._min_size, self._max_size)
def move_five(self, num_move):
self.oligo.move_five(num_move)
class _WorkOligoManager:
"""Manage oligos that are being adjusted, encapsulating access.
Using this is a bit tricky right now, likely due to bad design on
my part. Mainly, it is stateful and can only be initialized and used
procedurally.
"""
def __init__(self, all_oligos, min_size, max_size, melt_calc, melt_thresh):
self._initialize(all_oligos, min_size, max_size, melt_calc,
melt_thresh)
self.min_size = min_size
self.max_size = max_size
self.melt_calc = melt_calc
self.melt_thresh = melt_thresh
self.debug = False
self.num_allowed_changes = 4
# keep information about how often we've changed a pacticular oligo
# this will prevent infinite loops where we change an oligo back and
# forth
self._oligo_change_counts = {}
for cur_oligo in self._work_oligos:
self._oligo_change_counts[cur_oligo] = 0
def _initialize(self, all_oligos, min_size, max_size, melt_calc,
melt_thresh):
self._work_oligos = []
self._initial_oligos = []
for index, oligo_seq in enumerate(all_oligos):
new_oligo = _WorkOligo(index, oligo_seq, min_size, max_size,
melt_calc, melt_thresh)
self._work_oligos.append(new_oligo)
self._initial_oligos.append(oligo_seq.copy())
def reset(self):
"""Revert all oligos back to their initial state.
"""
self._initialize(self._initial_oligos, self.min_size, self.max_size,
self.melt_calc, self.melt_thresh)
def replace_oligo(self, oligo):
"""Replace an oligo at the given position with the new oligo.
"""
self._work_oligos.sort()
self._work_oligos[oligo.index] = oligo
def get_cur_oligos(self):
"""Retrieve all oligos, post adjustment, for return.
"""
self._work_oligos.sort()
final_oligos = []
for work_oligo in self._work_oligos:
final_oligos.append(work_oligo.oligo)
return final_oligos
def _next_oligo(self, work_oligos, num_adjust):
"""Retrieve a new oligo to adjust by the given amount.
This retrieves an oligo to adjust which has the maximum amount of
potential adjustment, to leave as much possible buffer for future
adjustments. If oligos all have an equivalent amount of adjustment
potential, then oligos near the end of the cascade are preferred which
will cause more predictable amounts of adjustment (since less overlaps
need to be calculated and considered).
"""
assert num_adjust != 0
max_flex = None
cur_max = None
others = []
work_oligos.sort()
for oligo in work_oligos:
if num_adjust > 0:
flex = oligo.to_buffer_max()
else:
flex = oligo.to_buffer_min()
# consider the oligo if it hasn't reached maximum changes
if self._oligo_change_counts[oligo] > self.num_allowed_changes:
others.append(oligo)
else:
# check this oligo if we have flexibility
if flex is not None:
# start collecting the oligos at a maximum flex
if max_flex is None:
assert cur_max is None, cur_max
max_flex = flex
cur_max = oligo
# we have discovered a new maximum flex
elif flex > max_flex:
max_flex = flex
assert cur_max is not None
others.append(cur_max)
cur_max = oligo
# we are at the current maximum flex
# since we are now an oligo closer to the end, we
# choose this as our new max
elif flex == max_flex:
others.append(cur_max)
cur_max = oligo
# we are less than the current flex
else:
others.append(oligo)
# no flexibility
else:
assert flex is None
others.append(oligo)
if cur_max is None:
# in this case, we pick a random oligo -- we have to ignore our
# buffer to work out something
random_max, others = self._random_oligo(work_oligos, num_adjust)
self._oligo_change_counts[random_max] += 1
return random_max, others
assert len(others) == (len(work_oligos) - 1)
# update how many times we've used this oligo
self._oligo_change_counts[cur_max] += 1
return cur_max, others
def _random_oligo(self, work_oligos, num_adjust):
"""Retrieve a random oligo from the mix which has room for adjustment.
Right now this just pops an oligo off the end of a randomized list of
oligos. We could be a lot smarter about this right here.
"""
# randomly shuffle the oligos to allow differential access
reject_oligos = []
while 1:
cur_oligo = random.choice(work_oligos)
work_oligos.remove(cur_oligo)
if self._oligo_change_counts[cur_oligo] > self.num_allowed_changes:
reject_oligos.append(cur_oligo)
else:
# expanding, pick one that is not at its max size
if num_adjust > 0:
if not cur_oligo.at_max_size():
break
else:
reject_oligos.append(cur_oligo)
# shrinking, pick one not at its minimum size
elif num_adjust < 0:
if not cur_oligo.at_min_size():
break
else:
reject_oligos.append(cur_oligo)
else:
raise ValueError("How did we get a zero adjust?")
if len(work_oligos) == 0:
raise ValueError("No oligos that can be adjusted")
rest_oligos = work_oligos + reject_oligos
return cur_oligo, rest_oligos
def adjust_oligo_and_next(self, index, num_adjust):
"""Adjust a given oligo by index and the next oligo.
This deals with the 3' end of target oligo.
"""
# initial check -- we can't do this on the last oligo
if index == len(self._work_oligos) - 1:
return None, None
change, target = None, None
for cur_index, cur_oligo in enumerate(self._work_oligos):
if cur_index == index + 1:
change = cur_oligo.copy()
elif cur_index == index:
target = cur_oligo.copy()
assert change is not None and target is not None, (change, target)
adjust_oligo = self._adjust_oligo(target, num_adjust)
if adjust_oligo is None:
return None, None
else:
new_start = adjust_oligo.three_side_overlap()
next_start = change.start()
change.move_five(new_start - next_start)
# check to be sure we are still within a decent size range -- reset
# otherwise
if change.bad_size() != 0:
return None, None
else:
return target, change
def adjust_oligo_and_prev(self, index, num_adjust):
"""Adjust a given oligo by index and the previous oligo.
This deals with the 5' of a target oligo, and basically moves it by
the given amount by shrinking (increase the target) or expanding
(decrease the target).
"""
# initial check -- we can't do this on the first oligo in a seq
if index == 0:
return None, None
change, target = None, None
for cur_index, cur_oligo in enumerate(self._work_oligos):
if cur_index == index - 1:
change = cur_oligo.copy()
elif cur_index == index:
target = cur_oligo.copy()
assert change is not None and target is not None, (index, change,
target)
adjust_oligo = self._adjust_oligo(change, num_adjust)
if adjust_oligo is None:
return None, None
else:
new_start = adjust_oligo.three_side_overlap()
prev_start = target.start()
target.move_five(new_start - prev_start)
# check to be sure we are still within a decent size range -- reset
# otherwise
if target.bad_size() != 0:
return None, None
else:
return target, change
def adjust_next_oligo(self, num_adjust):
"""Adjust the next oligo in line by the given amount.
"""
num_orig_oligos = len(self._work_oligos)
# print "Original oligos"
# for oligo in self._work_oligos:
# print oligo
start_length = self._work_oligos[-1].end() - \
self._work_oligos[0].start()
work_oligo, rest_oligos = self._next_oligo(self._work_oligos,
num_adjust)
adjusted_oligo = self._adjust_oligo(work_oligo, num_adjust)
# adjust all trailing oligos and calculate how much we've changed
if adjusted_oligo:
all_oligos = self._adjust_trailing_oligos(adjusted_oligo,
rest_oligos)
adjusted = self._calculate_adjust(all_oligos, start_length,
num_adjust)
self._work_oligos = all_oligos
# we could not adjust the oligo, fix our oligos back to their original
# state and return the no adjust information
else:
all_oligos = rest_oligos
assert work_oligo not in all_oligos
all_oligos.append(work_oligo)
all_oligos.sort()
adjusted = self._calculate_adjust(all_oligos, start_length,
num_adjust)
assert adjusted == 0, "Expected no change"
self._work_oligos = all_oligos
assert len(self._work_oligos) == num_orig_oligos, \
"We are not keeping a consistent number of oligos"
return adjusted
def _adjust_trailing_oligos(self, adjust_oligo, rest_oligos):
"""After adjusting an oligo, move all trailing oligos to compensate.
This finishes the job of adjusting an oligo, which is to move all of
the remaining oligos with respect to the adjusted oligo so that we can
maintain consistent overlaps.
"""
all_oligos = rest_oligos
all_oligos.append(adjust_oligo)
all_oligos.sort()
adjust_index = all_oligos.index(adjust_oligo)
if self.debug:
print "Adjust position: %s of %s" % (adjust_index, len(all_oligos))
to_move_oligos = all_oligos[adjust_index + 1:]
moved_oligos = []
new_start = adjust_oligo.three_side_overlap()
for to_move in to_move_oligos:
to_move.shift_start(new_start)
moved_oligos.append(to_move)
new_start = to_move.three_side_overlap()
final_oligos = all_oligos[:adjust_index] + [adjust_oligo] + \
moved_oligos
#print [str(x) for x in final_oligos]
return final_oligos
def _calculate_adjust(self, all_oligos, start_length, target_adjust):
"""Calculate the amount of change caused by an adjustment on an oligo.
"""
#for oligo in all_oligos:
# print oligo
total_size = all_oligos[-1].end() - all_oligos[0].start()
adjust = total_size - start_length
# sanity check that we adjusted in the correct direction
# XXX This isn't a fair test since depending on the vagueries of the
# overlaps we could actually move into a region which causes the
# overlaps to adjust in the wrong direction.
#if target_adjust < 0:
# assert adjust <= 0, (target_adjust, adjust)
#elif target_adjust > 0:
# assert adjust >= 0, (target_adjust, adjust)
#else:
# raise ValueError("Why is the adjust zero?")
#if abs(adjust) > abs(target_adjust):
# for oligo in all_oligos:
# print oligo
return adjust
def _adjust_oligo(self, cur_oligo, num_adjust):
"""Adjust the oligo by the given amount, returning the adjusted oligo.
"""
if num_adjust < 0:
# adjust only if we are not at the minimum size
if cur_oligo.at_min_size():
cur_oligo = None
else:
cur_oligo.shrink_oligo_three(abs(num_adjust))
return cur_oligo
elif num_adjust > 0:
# only adjust if we are less than the maximum size
if cur_oligo.at_max_size():
cur_oligo = None
else:
cur_oligo.expand_oligo_three(abs(num_adjust))
return cur_oligo
else:
raise ValueError("Expected non-zero adjust: %s" % num_adjust)
# make sure we don't go outside our size range
if cur_oligo:
bad_size = cur_oligo.bad_size()
assert bad_size == 0, (bad_size, cur_oligo.end() -
cur_oligo.start())
return cur_oligo
class OligoDesignerMixin:
"""Useful functionality across oligomer design programs.
These require AdjustableOligos.
"""
def _get_name(self, parent_name, index):
assert index < 100, "Only handles up to 100 oligos"
new_name = "%s-%02d" % (parent_name, index)
#assert len(new_name) <= 30, new_name
return new_name
def _add_extra_sequences(self, oligos, five_extra, three_extra):
"""Add sequence to the oligos that will be trimmed off.
"""
for index, oligo in enumerate(oligos):
# reverse complemented sequences -- switch the 5' and 3' ends
if index % 2 == 0:
oligo.adjust_for_cut_seq(three_extra, five_extra)
# regular sequences, 5' and 3' ends are normal
else:
oligo.adjust_for_cut_seq(five_extra, three_extra)
return oligos
def _adjust_oligos(self, all_oligos, num_adjust, remain_adjust, min_size,
max_size):
"""Adjust the given oligos to get to the given adjustment size.
"""
oligo_manager = _WorkOligoManager(all_oligos, min_size, max_size,
self._melt_calc, self._melt_thresh)
while 1:
change = oligo_manager.adjust_next_oligo(num_adjust)
remain_adjust -= change
#print "Made change", change, num_adjust, remain_adjust
# stop when we've finished the amount we need to adjust
if ((num_adjust >= 0 and remain_adjust <= 0) or
(num_adjust <= 0 and remain_adjust >= 0)):
break
return oligo_manager.get_cur_oligos()
class OverlapGuessMixin:
"""Functionality for guessing overlaps for a sequence.
"""
def _find_avg_overlap(self, sequence, size, melt_calc, melt_thresh,
buffer_size):
"""Find an average overlap for the given sequence region.
"""
overlaps = []
for start_pos in range(0, len(sequence) - size, 5):
test_oligo = AdjustableOligo("", sequence, start_pos, start_pos +
size, buffer_size)
overlap, blah = test_oligo.three_side_overlap(melt_calc,
melt_thresh)
overlap_length = test_oligo.end - overlap
overlaps.append(overlap_length)
median_overlap = scipy.median(overlaps)
return int(round(median_overlap))
def _find_piece_size(self, length, overlap, num_pieces):
"""Find the size of pieces spanning a region with given overlaps.
Size is calculated starting from the equation:
(num_pieces - 1) * (size - overlap) + size = total_length
since the region dealt with is all of the pieces minus the overlap
except the last. This can be rearranged to calculate oligo size given
the number of pieces, overlap and total_length:
size = [(total_length - overlap) / num_pieces] + overlap
"""
return (float(length - overlap) / float(num_pieces)) + overlap
def _find_num_pieces(self, length, overlap, size):
"""Retrieve the number of pieces given a particular oligo size.
Using the equation in find_piece_size, the number of pieces is
calculated using:
num_oligos = [(total_length - size) / (size - overlap)] + 1
"""
return (float(length - size) / float(size - overlap)) + 1
class AbstractFixedEndCorrect(OligoDesignerMixin, OverlapGuessMixin):
"""Abstract class to design oligos with fixed end overlaps.
"""
def __init__(self, min_size, max_size, overlap_calc, buffer_size = 5):
self.min_size = min_size
self.max_size = max_size
self._buffer_size = buffer_size
self._melt_calc = None
self._melt_thresh = None
self._overlap_calc = overlap_calc
def set_melt_calc(self, melt_calc):
pass
def split(self, parent_name, sequence):
"""Split the sequence into an even number of oligos.
"""
five_oligo, three_oligo = self._initial_oligos(sequence,
self.max_size, self.min_size)
initial_size, num_oligos = self._predict_even_size(five_oligo,
three_oligo, sequence)
assert num_oligos % 2 == 0, "Expect an even number of oligos"
filler_oligos = self._get_filler_oligos(five_oligo, initial_size,
num_oligos - 1, sequence)
last_filler = self._get_last_fill_oligo(filler_oligos[-1],
three_oligo, sequence)
need_adjust = last_filler.bad_size_with_buffer(self.min_size,
self.max_size)
while need_adjust != 0:
#print "Need adjust", need_adjust, last_filler.end - last_filler.start
num_adjust = int(round(float(need_adjust) /
float(len(filler_oligos))))
if num_adjust == 0: # adjust by a small amount
if need_adjust > 0:
num_adjust = 1
else:
num_adjust = -1
filler_oligos = self._adjust_oligos(filler_oligos, num_adjust,
need_adjust, self.min_size, self.max_size)
last_filler = self._get_last_fill_oligo(filler_oligos[-1],
three_oligo, sequence)
need_adjust = last_filler.bad_size_with_buffer(self.min_size,
self.max_size)
return [five_oligo] + filler_oligos + [last_filler] + [three_oligo]
def _get_last_fill_oligo(self, five_oligo, three_oligo, sequence):
"""Get the very last oligo which may be too small or large.
"""
last_start, melt = five_oligo.three_side_overlap()
last_end, melt = three_oligo.five_side_overlap()
return FixedOverlapOligo(sequence, last_start, last_end,
self._overlap_calc, self._buffer_size)
def _get_filler_oligos(self, five_oligo, oligo_size, total_oligos,
sequence):
"""Fill in a region with the given number of sized oligos.
"""
cur_oligo = five_oligo
filler_oligos = []
for index in range(total_oligos):
overlap_start, melt = cur_oligo.three_side_overlap()
cur_oligo = FixedOverlapOligo(sequence, overlap_start,
overlap_start + oligo_size, self._overlap_calc,
self._buffer_size)
filler_oligos.append(cur_oligo)
return filler_oligos
def _predict_even_size(self, five_oligo, three_oligo, seq):
start, junk = five_oligo.three_side_overlap()
end, junk = three_oligo.five_side_overlap()
length = end - start
assert end - start > 2 * self.min_size, \
"We're expecting a reasonable size to design oligos for"
piece_size = self.max_size - self._buffer_size
while 1:
overlap = self._overlap_calc(0, piece_size)
num_oligos = self._find_num_pieces(length, overlap, piece_size)
#print num_oligos, piece_size
round_oligos = int(round(num_oligos))
if round_oligos % 2 == 0: # if we have an even number
extra = round_oligos - num_oligos
# if we are pretty close to a round number
if extra <= 0.3 and extra >= -0.3:
break
assert piece_size >= self.min_size
piece_size -= 1
return piece_size, round_oligos
def _initial_oligos(self, sequence, max_size, min_size):
"""Design the five and three most oligo for the sequence.
"""
size = (max_size + min_size) / 2 # int division
five_oligo = FixedOverlapOligo(sequence, 0, size, self._overlap_calc,
self._buffer_size)
three_oligo = FixedOverlapOligo(sequence, len(sequence) - size,
len(sequence), self._overlap_calc, self._buffer_size)
return five_oligo, three_oligo
class TileEndCorrect(AbstractFixedEndCorrect):
"""Design a set of tiled oligos with primer correct ends.
"""
def __init__(self, min_size, max_size, tile_distance, buffer_size = 5):
raise NotImplementedError("Tiled oligos design is not up to date")
class TileOverlapCalc:
def __init__(self, tile_distance):
self.tile_distance = tile_distance
def overlap_calc(self, start, end):
size = end - start
return size - self.tile_distance
tile_overlapper = TileOverlapCalc(tile_distance)
AbstractFixedEndCorrect.__init__(self, min_size, max_size,
tile_overlapper.overlap_calc, buffer_size)
class AbutEndCorrect:
"""Design a set of abutting oligos with primer correct ends.
This is very simplistic right now, since we are just testing the idea of
Abutting ends. It needs to still be adjustable, and probably needs testing
across a number of cases.
"""
def __init__(self, min_size, max_size, buffer_size = 5):
self.min_size = min_size
self.max_size = max_size
def set_melt_calc(self, melt_calc):
self.melt_calc = melt_calc
def split(self, sequence):
piece_size, num_oligos = self._find_even_pieces(len(sequence),
self.min_size, self.max_size)
cur_start = 0
prev_end = None
oligos = []
for oligo_index in range(num_oligos - 1):
start = cur_start
end = start + piece_size
print start, end
oligos.append(FixedOverlapOligo(sequence, start, end, None, 0))
if prev_end is None:
assert piece_size % 2 == 0
cur_start = piece_size / 2
prev_end = end
else:
cur_start = prev_end
prev_end = end
# add the last oligo
final_start = cur_start
final_end = len(sequence)
assert (final_end - final_start) >= self.min_size and \
(final_end - final_start) <= self.max_size, \
(final_start, final_end)
oligos.append(FixedOverlapOligo(sequence, final_start, final_end,
None, 0))
return oligos
def _find_even_pieces(self, seq_size, min_size, max_size):
"""Find pieces suitable for use as tiled oligos.
This tries to find an oligo size for abutting oligos that fills two
criteria:
1. Have an even number of oligos
2. Have the oligo size be even.
"""
choices = range(min_size, max_size + 2, 2)
choices.reverse()
for length in choices:
assert length % 2 == 0
overlap = length / 2
piece_target = self._find_num_pieces(seq_size, overlap, length)
piece_target_round = int(round(piece_target))
if abs(piece_target_round - piece_target) < 0.2:
# even number of pieces
if piece_target_round % 2 == 0:
return length, piece_target_round
raise ValueError("Could not find pieces of an appropriate size")
def _find_num_pieces(self, total_length, oligo_length, overlap):
"""Retrieve the number of pieces given a particular oligo size.
Using the equation in find_piece_size, the number of pieces is
calculated using:
num_oligos = [(total_length - oligo_length) /
(oligo_length - overlap)] + 1
"""
return (float(total_length - oligo_length) /
float(oligo_length - overlap)) + 1
class OligoSizeError(Exception):
"""Exception for when we can't find an expected oligo size.
"""
pass
class DesignSingleOligo(OligoDesignerMixin):
"""Simple oligo splitter that expects the sequence to be one oligo.
"""
def __init__(self, min_size, max_size, assembly_props, pre_builder,
post_builder, keep_name = True):
self._min_size = min_size
self._max_size = max_size
self._assembly_props = assembly_props
self._pre_builder = pre_builder
self._post_builder = post_builder
self._keep_name = keep_name
def set_melt_calc(self, melt_calc):
pass
def get_min_max(self, conservative = False):
return self._min_size, self._max_size
def get_overlap(self):
return 0
def get_builders(self):
"""Retrieve the pre and post builders associated with this breakdown.
"""
new_pre = self._pre_builder.copy()
new_post = self._post_builder.copy()
return new_pre, new_post
def split(self, parent_name, sequence, is_first, is_last):
"""Split the sequence, retrieving a single oligo.
"""
assert len(sequence) >= self._min_size and \
len(sequence) <= self._max_size, \
"Sequence not in size range"
if self._keep_name:
name = parent_name
else:
name = self._get_name(parent_name, 0)
this_oligo = FixedEndOligo(name, sequence, 0, len(sequence),
self._assembly_props, None, 0, five_fixed = True, three_fixed = True)
return [this_oligo]
class DesignLinkerEndCorrect(OligoDesignerMixin, OverlapGuessMixin):
"""Design end correct oligos with specific end linkers.
This design has two added features:
- a fixed overlap size
- five side and three side linker regions
The fixed overlap size allows more accurate determination of potential piece
sizes, helping eliminate a lot of the work involved in adjusting pieces to
fit within a shifting overlap framework.
The end linkers allow designs that incorporate end regions. These end
regions can be used for infusion cloning into a vector, and then
amplification out of the vector using the internal oligos.
To not use end linkers, set five_size and/or three size to None.
"""
def __init__(self, max_size, five_size, three_size,
buffer_overlaps, assembly_props, pre_builder, post_builder):
self._five_size = five_size
self._three_size = three_size
self._assembly_props = assembly_props
self._pre_builder = pre_builder
self._post_builder = post_builder
if len(buffer_overlaps[0]) == 3:
cur_buffer, overlap, temp_thresh = buffer_overlaps[0]
elif len(buffer_overlaps[0]) == 5:
(cur_buffer, overlap, temp_thresh, self._five_size,
self._three_size) = buffer_overlaps[0]
else:
raise ValueError('Do not understand buffer overlaps: %s' %
buffer_overlaps[0])
self._flex_buffers = buffer_overlaps[1:]
self.min_size = max(overlap + 1, 20)
self.max_size = max_size
self._overlap = overlap
self._buffer = cur_buffer
self._melt_thresh = temp_thresh
self._temp_buffer = 2
self._use_fixed_ends = False
self._even_fudge = 0.24
assert self._five_size is not None, 'Need to set five size'
assert self._three_size is not None, 'Need to set three size'
if self._five_size:
assert self._five_size > self._overlap, "Five size too small"
if self._three_size:
assert self._three_size > self._overlap, "Three size too small"
class OverlapCalc:
def __init__(self, overlap):
self.overlap = overlap
def overlap_calc(self, start, end):
return self.overlap
self._oc = OverlapCalc(self._overlap).overlap_calc
def get_builders(self):
"""Retrieve the pre and post builders associated with this breakdown.
"""
new_pre = self._pre_builder.copy()
new_post = self._post_builder.copy()
return new_pre, new_post
def get_flex_splitter(self):
"""Return a splitter with a more flexible buffer size.
"""
if self._flex_buffers is None or len(self._flex_buffers) == 0:
raise ValueError("No flexible splitter available")
else:
# Get the next buffer size and overlap to try, which allows us to
# get a new range of oligo solutions.
# We need to search this space more intelligently
print "Getting flex splitter", self._flex_buffers[0]
new_splitter = DesignLinkerEndCorrect(self.max_size,
self._five_size, self._three_size,
self._flex_buffers, self._assembly_props, self._pre_builder,
self._post_builder)
new_splitter.set_melt_calc(self._melt_calc)
return new_splitter
def get_overlap(self):
return self._overlap
def get_pieces_and_size(self, seq):
"""Right now we don't return this since we don't handle hints.
"""
return -1, -1
# return self._find_even_pieces(len(seq), self.min_size,
# self.max_size)
def get_min_max(self, conservative = False):
return self.min_size, self.max_size
def set_melt_calc(self, melt_calc):
self._melt_calc = melt_calc
def split(self, parent_name, sequence, is_first, is_last):
first_oligo = FixedEndOligo(self._get_name(parent_name, 0),
sequence, 0, self._five_size, self._assembly_props,
self._oc, self._buffer, five_fixed = True,
three_fixed = is_first and self._use_fixed_ends)
second_oligo = FixedEndOligo(self._get_name(parent_name, 1),
sequence, self._five_size - self._overlap,
self._five_size + self._overlap,
self._assembly_props, self._oc,
self._buffer)
five_size = self._five_size
five_index = 2
three_size = self._three_size
three_index = 2
piece_size, num_oligos = self._find_even_pieces(
len(sequence) - five_size - three_size,
self.min_size, self.max_size)
#print piece_size, num_oligos
if num_oligos > 0:
third_oligo = FixedEndOligo(self._get_name(parent_name, five_index),
sequence, five_size, five_size + piece_size,
self._assembly_props,
self._oc, self._buffer,
five_fixed = True and self._use_fixed_ends)
cur_start = five_size + piece_size - self._overlap
oligos = []
for oligo_index in range(1, num_oligos - 1):
start = cur_start
end = start + piece_size
oligos.append(FixedEndOligo(self._get_name(parent_name,
oligo_index + five_index), sequence, start, end,
self._assembly_props, self._oc,
self._buffer))
cur_start = end - self._overlap
total_oligos = num_oligos + five_index + three_index
last_middle = FixedEndOligo(self._get_name(parent_name,
total_oligos - 3), sequence, cur_start, len(sequence) -
three_size, self._assembly_props,
self._oc, self._buffer,
three_fixed = True and self._use_fixed_ends)
else:
third_oligo = None
last_middle = None
total_oligos = 4
if total_oligos == 4:
penultimate_start = min(second_oligo.end - self._overlap,
len(sequence) - three_size - self._buffer)
penultimate_start = max(penultimate_start,
second_oligo.start + 2)
penultimate_end = max((len(sequence) - three_size + self._overlap),
second_oligo.end + self._buffer)
penultimate_end = min(penultimate_end, len(sequence) - self._buffer)
else:
penultimate_start = len(sequence) - three_size - self._overlap
penultimate_end = len(sequence) - three_size + self._overlap
penultimate_oligo = FixedEndOligo(self._get_name(parent_name,
total_oligos - 2), sequence,
penultimate_start, penultimate_end,
self._assembly_props, self._oc, self._buffer)
last_oligo = FixedEndOligo(self._get_name(parent_name,
total_oligos - 1),
sequence, len(sequence) - three_size, len(sequence),
self._assembly_props,
self._oc, self._buffer,
five_fixed = is_last and self._use_fixed_ends,
three_fixed = True)
if third_oligo and last_oligo:
all_oligos = self._adjust_overlaps_to_temp(
[first_oligo, second_oligo, third_oligo] + oligos +
[last_middle, penultimate_oligo, last_oligo])
else:
all_oligos = self._adjust_overlaps_to_temp(
[first_oligo, second_oligo, penultimate_oligo, last_oligo])
for oligo in all_oligos:
if oligo.bad_size(self.min_size, self.max_size) != 0:
raise AssemblyDesignError("Oligo not in size: %s" % oligo)
return all_oligos
def _adjust_overlaps_to_temp(self, oligos):
"""Adjust the overlaps of the given overlaps above a melting thresh.
The idea is that the fixed overlaps could leave some overlaps which
have extra low melting temperatures -- this increases these overlaps
(by expanding oligos) to be above this temperature.
"""
min_temp_size = self.min_size + self._temp_buffer
max_temp_size = self.max_size - self._temp_buffer
for oligo_index in range(len(oligos) - 1):
overlap_melt = -1.0
left_adjust = 0
right_adjust = 0
while overlap_melt < self._melt_thresh:
left_oligo = oligos[oligo_index].copy()
right_oligo = oligos[oligo_index + 1].copy()
try:
left_oligo.adjust_three(left_adjust)
right_oligo.adjust_five(right_adjust)
except ValueError:
print left_adjust, right_adjust, left_oligo, right_oligo
raise
left_seq = left_oligo.get_seq()
right_seq = right_oligo.get_seq()
overlap_start = left_seq.find(right_seq[:self._overlap])
if overlap_start == -1:
print left_adjust, right_adjust, left_oligo, right_oligo
raise AssemblyDesignError(
'Could not find overlap: %s %s %s %s' %
(self._overlap, overlap_start, right_seq, left_seq))
overlap = left_seq[overlap_start:]
# special case hacked in -- use '3' to represent pieces which
# should not be in an overlap
has_special_bases = False
if overlap.find('3') != -1:
overlap_melt = -1.0
has_special_bases = True
else:
overlap_comp = \
Seq(overlap, unambiguous_dna).complement().data
overlap_melt = self._melt_calc.calculate_melting(overlap,
overlap_comp)
# print left_oligo.get_name(), overlap_melt, left_adjust, \
# right_adjust
# set out melting temperature to the threshold, if we've
# reached our maximum oligo size on either oligo
left_bad = left_oligo.bad_size(min_temp_size, max_temp_size)
right_bad = right_oligo.bad_size(min_temp_size,
max_temp_size)
# if both of the oligos are over size, or if we've moved
# the left oligo too far so it's past the start of the right
# one
left_too_far = left_oligo.end >= right_oligo.end - 1
right_too_far = right_oligo.start <= left_oligo.start + 1
# if both are out of range or we have moved both too far,
# we have to give up
if ((left_bad != 0 and right_bad != 0) or
(left_bad != 0 and right_too_far) or
(right_bad != 0 and left_too_far) or
(left_too_far and right_too_far)):
overlap_melt = self._melt_thresh
elif ((left_bad == 0) and
(right_adjust >= left_adjust or right_bad != 0 or
right_too_far) and not left_too_far):
left_adjust += 1
elif right_bad == 0 and not right_too_far:
right_adjust += 1
else:
raise AssemblyDesignError('Not moving on adjustments: %s %s'
% (left_oligo, right_oligo))
oligos[oligo_index] = left_oligo
oligos[oligo_index + 1] = right_oligo
return oligos
def _find_even_pieces(self, seq_size, min_size, max_size):
"""Find pieces suitable for use in filling in the sequence.
"""
choices = range(min_size + self._buffer, max_size + 1 - self._buffer)
choices.reverse()
for length in choices:
#print length, seq_size, self._overlap
piece_target = self._find_num_pieces(seq_size, self._overlap,
length)
piece_target_round = int(round(piece_target))
#print length, seq_size, self._overlap, piece_target, \
# piece_target_round
if abs(piece_target_round - piece_target) < self._even_fudge:
# even number of pieces
if piece_target_round % 2 == 0:
return length, piece_target_round
raise AssemblyDesignError("Could not find pieces of appropriate size")
class DesignEndCorrect(OligoDesignerMixin, OverlapGuessMixin):
"""Design a set of oligos in which the ends are primer correct.
Regions with an even number of oligos will have correctly oriented oligos
with respect to the 5' (5 to 3) and 3' (3 to 5) primers, so that these
primers can initially sit down and drive the reaction. Having these ends
correctly oriented will hopefully allow initial primer binding and limit
cross reactivity.
"""
def __init__(self, min_size, max_size, melt_thresh, buffer_size = 5):
self.min_size = min_size
self.max_size = max_size
self._buffer_size = buffer_size
self._melt_calc = None
self._melt_thresh = melt_thresh
self.num_same_adjusts = 5
def get_flex_splitter(self):
"""Return a splitter with a more flexible buffer size.
"""
raise ValueError("No flexible splitter available")
def set_melt_calc(self, melt_calc):
self._melt_calc = melt_calc
def split(self, parent_name, sequence, is_first, is_last):
"""Split the sequence into an even number of oligos.
"""
five_oligo, three_oligo = self._initial_oligos(sequence,
self.max_size, self.min_size)
try:
initial_size, num_oligos = self._predict_even_size(five_oligo,
three_oligo, sequence, self._buffer_size)
# if we can't find a size with buffers, try without
except OligoSizeError:
initial_size, num_oligos = self._predict_even_size(five_oligo,
three_oligo, sequence, 0)
assert num_oligos % 2 == 0, "Expect an even number of oligos"
filler_oligos = self._get_filler_oligos(five_oligo, initial_size,
num_oligos - 1, sequence)
last_filler = self._get_last_fill_oligo(filler_oligos[-1],
three_oligo, sequence)
need_adjust = last_filler.bad_size_with_buffer(self.min_size,
self.max_size)
previous_needs = []
while need_adjust != 0:
previous_needs.append(need_adjust)
num_adjust = int(round(float(need_adjust) /
float(len(filler_oligos))))
print "Need adjust", need_adjust, num_adjust, \
last_filler.end - last_filler.start
if num_adjust == 0: # adjust by a small amount
if need_adjust > 0:
change = 1
else:
change = -1
filler_oligos = self._adjust_oligos(filler_oligos, change,
need_adjust, self.min_size, self.max_size)
else:
filler_oligos = self._adjust_oligos(filler_oligos, num_adjust,
need_adjust, self.min_size, self.max_size)
last_filler = self._get_last_fill_oligo(filler_oligos[-1],
three_oligo, sequence)
need_adjust = last_filler.bad_size_with_buffer(self.min_size,
self.max_size)
# try to avoid an issue where we are moving back and forth across
# small sizes within the buffer range -- just accept not being
# within the buffer
if previous_needs.count(need_adjust) >= self.num_same_adjusts:
need_adjust = last_filler.bad_size(self.min_size,
self.max_size)
final_oligos = [five_oligo] + filler_oligos + [last_filler] + \
[three_oligo]
# make sure we have our target number of oligos (middle oligos + ends)
assert len(final_oligos) == num_oligos + 2, (num_oligos,
len(final_oligos), [str(x) for x in final_oligos])
for index, final_oligo in enumerate(final_oligos):
final_oligo.set_name(self._get_name(parent_name, index))
return final_oligos
def _get_last_fill_oligo(self, five_oligo, three_oligo, sequence):
"""Get the very last oligo which may be too small or large.
"""
last_start, melt = five_oligo.three_side_overlap(self._melt_calc,
self._melt_thresh)
last_end, melt = three_oligo.five_side_overlap(self._melt_calc,
self._melt_thresh)
return AdjustableOligo("", sequence, last_start, last_end,
self._buffer_size)
def _get_filler_oligos(self, five_oligo, oligo_size, total_oligos,
sequence):
"""Fill in a region with the given number of sized oligos.
"""
cur_oligo = five_oligo
filler_oligos = []
for index in range(total_oligos):
overlap_start, melt = cur_oligo.three_side_overlap(self._melt_calc,
self._melt_thresh)
cur_oligo = AdjustableOligo("", sequence, overlap_start,
overlap_start + oligo_size, self._buffer_size)
filler_oligos.append(cur_oligo)
return filler_oligos
def _predict_even_size(self, five_oligo, three_oligo, seq, buffer_size):
"""Predict the size of oligos to give an even number of oligos.
"""
start, junk = five_oligo.three_side_overlap(self._melt_calc,
self._melt_thresh)
end, junk = three_oligo.five_side_overlap(self._melt_calc,
self._melt_thresh)
length = end - start
assert end - start > 2 * self.min_size, \
"We're expecting a reasonable size to design oligos for"
avg_overlap = self._find_avg_overlap(seq[start:end], self.max_size,
self._melt_calc, self._melt_thresh, buffer_size)
#print "avg", avg_overlap
num_oligos = 2
while 1:
size = self._find_piece_size(length, avg_overlap, num_oligos)
#print num_oligos, size
if size >= (self.min_size + buffer_size) and \
size <= (self.max_size - buffer_size):
break
if size <= self.min_size:
raise OligoSizeError(
"Could not find estimated size: (%s, %s, %s, %s)"
% (size, self.min_size, self.max_size, buffer_size))
# advance by two oligos at a time
num_oligos += 2
return int(round(size)), num_oligos
def _initial_oligos(self, sequence, max_size, min_size):
"""Design the five and three most oligo for the sequence.
"""
size = (max_size + min_size) / 2 # int division
five_oligo = AdjustableOligo("", sequence, 0, size, self._buffer_size)
three_oligo = AdjustableOligo("", sequence, len(sequence) - size,
len(sequence), self._buffer_size)
return five_oligo, three_oligo
class DesignOligosFromThreeEnd(OligoDesignerMixin):
"""Design oligos moving from the 3' to 5' region of a sequence.
"""
def __init__(self, params, melt_calc):
self._designer = SimpleOligoDesigner(params.min_size, params.max_size,
params.cut_five, params.cut_three, params.temp, melt_calc)
self._params = params
def split(self, sequence):
"""Split the given sequence into oligos.
"""
self._designer.set_sequence(sequence)
initial_oligo = self._designer.three_most_region()
cur_oligo = initial_oligo
five_oligos = []
while 1:
next_oligo = self._designer.oligo_on_five(cur_oligo)
five_oligos.append(next_oligo)
if next_oligo.at_five_end():
break
cur_oligo = next_oligo
# turn around the five oligos so the 5'-most is first
five_oligos.reverse()
all_oligos = five_oligos + [initial_oligo]
final_oligos = self._add_extra_sequences(all_oligos,
self._params.cut_five, self._params.cut_three)
return final_oligos
class DesignOligosFromInternalRegion(OligoDesignerMixin):
"""Design oligos starting with an initial starting region.
"""
def __init__(self, params, melt_calc, start_function):
"""Initialize to design from a single internal location.
start_function is a callable object which returns a location to start
designing from when given a sequence.
"""
self._designer = SimpleOligoDesigner(params.min_size, params.max_size,
params.cut_five, params.cut_three, params.temp, melt_calc)
self.start_function = start_function
self._params = params
def split(self, sequence):
"""Split the given sequence into oligos.
"""
self._designer.set_sequence(sequence)
start_location = self.start_function(sequence)
initial_oligo = self._designer.region_from_location(start_location)
cur_oligo = initial_oligo
five_oligos = []
while 1:
next_oligo = self._designer.oligo_on_five(cur_oligo)
five_oligos.append(next_oligo)
if next_oligo.at_five_end():
break
cur_oligo = next_oligo
# turn around the five oligos so the 5'-most is first
five_oligos.reverse()
cur_oligo = initial_oligo
three_oligos = []
while 1:
next_oligo = self._designer.oligo_on_three(cur_oligo)
three_oligos.append(next_oligo)
if next_oligo.at_three_end():
break
cur_oligo = next_oligo
all_oligos = five_oligos + [initial_oligo] + three_oligos
final_oligos = self._add_extra_sequences(all_oligos,
self._params.cut_five, self._params.cut_three)
return final_oligos
class SimpleOligoDesigner:
"""Given a sequence, design oligomers that span it.
This designs oligos at a fixed size, moving in a linear fashion in the 5'
or 3' directions.
"""
def __init__(self, min_size, max_size, cut_five, cut_three, melt_temp,
melt_calc):
self.seq = None
assert max_size == min_size, \
"This class can only design fixed size oligos."
self.target_size = max_size - cut_five - cut_three
#self.cut_five = cut_five
#self.cut_three = cut_three
self.melt_temp = melt_temp
self.melt_calc = melt_calc
def set_sequence(self, seq):
self.seq = seq
def region_from_location(self, start_location):
"""Obtain an oligomer region given a location on the sequence.
This function is useful when starting to design oligomers on a
sequence.
"""
oligo = OligoRegion(self.seq, start_location, start_location +
self.target_size)
return oligo
def three_most_region(self):
"""Return an oligo on the 3' end of the sequence.
"""
oligo = OligoRegion(self.seq, (len(self.seq)) - self.target_size,
len(self.seq))
return oligo
def oligo_on_five(self, start_oligo):
"""Design an oligo to the 5' side of the given oligo.
"""
new_end, overlap_melt = start_oligo.five_side_overlap(self.melt_calc,
self.melt_temp)
# get the real start or the sequence start, depending if
# we've reached the start of the sequence
new_start = max(new_end - self.target_size, 0)
new_oligo = OligoRegion(self.seq, new_start, new_end)
return new_oligo
def oligo_on_three(self, start_oligo):
"""Design an oligo to the 3' side of the given oligo.
"""
new_start, overlap_melt = start_oligo.three_side_overlap(self.melt_calc,
self.melt_temp)
# get the real end, or the end of the sequence
new_end = min(new_start + self.target_size, len(self.seq))
new_oligo = OligoRegion(self.seq, new_start, new_end)
return new_oligo