"""Analysis of designed oligos for potential problems.
This modules contain classes that take simple OligoRegion-like objects (have
attributes full_seq, start, end) and analyze them for problem issues, which
need to be detected and designed around.
"""
import re
import copy
from Bio.Seq import Seq
from Bio.Data import IUPACData
from Bio.Alphabet.IUPAC import unambiguous_dna
from SynBio.Primers.Check import FiveHybFinder, ThreeHybFinder
from Oligo import _WorkOligoManager
# analysis constants
HAIRPIN = "End hairpin"
class SplitterAndAnalyzer:
"""Encompass an existing splitter with an oligo analyzer.
This makes use of existing OligoSplitters and Analyzers by combining the
functionality into a single class that does the splitting and
post-analysis and adjustment.
"""
def __init__(self, splitter, analyzer):
self.splitter = splitter
# create a unique copy of the analyzer -- since we may have multiple
# splitters using a single analyzer
# XXX This is a hack, revealing that this is probably not designed
# right.
self.analyzer = copy.deepcopy(analyzer)
self.analyzer.set_oligo_min_max(self.splitter.min_size,
self.splitter.max_size)
def set_melt_calc(self, melt_calc):
self.splitter.set_melt_calc(melt_calc)
self.analyzer.set_melt_info(melt_calc, self.splitter._melt_thresh)
def split(self, segment_name, sequence, region_start, region_end,
index_position, num_pieces):
"""Hijack a splitter "split" function to provide post-analysis.
"""
designed_oligos = self.splitter.split(segment_name, sequence)
if self.analyzer:
self.analyzer.analyze_oligos(segment_name,
designed_oligos, sequence)
#for oligo in designed_oligos:
# print oligo
# print oligo.str_analyses()
return designed_oligos
class _OligoProblems:
"""Hold problems with an oligo and provide functionality for fixing them.
"""
def __init__(self, oligo, hairpin_issues):
self.oligo = oligo
self.hairpin_issues = hairpin_issues
def __str__(self):
out = ""
for hairpin in self.hairpin_issues:
out += "\t%s\n" % (str(hairpin))
return out
def extends_secondary_structure(self, five_primer, three_primer,
num_nts = 2):
"""Determine if the given primers will extend oligo secondary structure.
Using all hairpin problems, this examines whether the addition of
either the five or three primer primer will increase existing
secondary structure.
"""
five_seq = five_primer.get_internal_five_region()
three_seq = three_primer.get_internal_three_region()
five_check = five_seq[-num_nts:]
three_check = three_seq[:num_nts]
assert len(five_check) == num_nts and len(three_check) == num_nts
for hairpin in self.hairpin_issues:
if hairpin.will_extend_five(five_check):
return 1
if hairpin.will_extend_three(three_check):
return 1
# if we got here, we don't have secondary structure issues
return 0
class OligoRepairError(Exception):
"""Error raised when an oligo could not be repaired.
This is raised when we cannot adjust an oligo to account for
cross-hybridization problems
"""
pass
class _SingleOligoRepair:
"""Try and fix issues with a single oligo.
This expand an oligo, trying to work around issues with an oligo end by
burying them in the oligo.
"""
def __init__(self, min_nts, max_nts, min_gcs, max_expand, max_shrink,
loose_flex, desperate_flex):
self.strict_five_analyzer = FiveHybFinder(min_nts, max_nts, min_gcs)
self.strict_three_analyzer = ThreeHybFinder(min_nts, max_nts, min_gcs)
loose_five_analyzer = FiveHybFinder(min_nts + loose_flex,
max_nts + loose_flex, min_gcs)
loose_three_analyzer = ThreeHybFinder(min_nts + loose_flex,
max_nts + loose_flex, min_gcs)
desperate_five_analyzer = FiveHybFinder(min_nts + desperate_flex,
max_nts + desperate_flex, min_gcs)
desperate_three_analyzer = ThreeHybFinder(min_nts + desperate_flex,
max_nts + desperate_flex, min_gcs)
self.five_analyzers = [self.strict_five_analyzer, loose_five_analyzer,
desperate_five_analyzer]
self.three_analyzers = [self.strict_three_analyzer, loose_three_analyzer,
desperate_three_analyzer]
self.max_expand = max_expand
self.max_shrink = max_shrink
self.debug = False
def set_oligo_min_max(self, min_size, max_size):
"""Set the range of sizes for oligos.
"""
self.min_oligo_size = min_size
self.max_oligo_size = max_size
def check_oligo(self, name, oligo, full_sequence):
"""Check an oligo to look for cross hybridization not in the sequence.
"""
fix_oligo = oligo.copy()
test_oligo = fix_oligo.get_seq()
five_hybs = self.strict_five_analyzer.check_oligo(test_oligo,
full_sequence)
# fix 5' issues given that we are not dealing with the 5'-most oligo:
# we can't fix that one
if five_hybs and not fix_oligo.at_five_end():
if self.debug:
print "Five"
for hyb in five_hybs:
print hyb
cur_index = 0
while 1:
analyzer = self.five_analyzers[cur_index]
try:
fix_oligo = self._fix_five(name, fix_oligo, full_sequence,
analyzer)
break
except OligoRepairError, msg:
pass
cur_index += 1
if cur_index >= len(self.five_analyzers):
raise OligoRepairError(msg)
test_oligo = fix_oligo.get_seq()
three_hybs = self.strict_three_analyzer.check_oligo(test_oligo,
full_sequence)
# fix 3' issues given that we are not dealing with the 3'-most oligo
if three_hybs and not fix_oligo.at_three_end():
if self.debug:
print "Three"
for hyb in three_hybs:
print hyb
cur_index = 0
while 1:
analyzer = self.three_analyzers[cur_index]
try:
fix_oligo = self._fix_three(name, fix_oligo, full_sequence,
analyzer)
break
except OligoRepairError, msg:
pass
cur_index += 1
if cur_index >= len(self.three_analyzers):
raise OligoRepairError("3 error: %s" % msg)
oligo.update(fix_oligo)
def _fix_three(self, name, oligo, full_seq, analyzer):
"""Fix an issue with the 3' end of an oligo.
If the 3' end of the oligo cross-reacts, try to fix the problem
by adjusting the end of the oligo.
"""
if self.debug:
print "3", oligo
hybs = []
for expand in range(self.max_expand):
test_oligo = oligo.copy()
test_oligo.adjust_three(expand + 1)
if self.debug:
print "Expand", test_oligo
if test_oligo.bad_size(self.min_oligo_size,
self.max_oligo_size) == 0:
hybs = analyzer.check_oligo(test_oligo.get_seq(),
full_seq)
if len(hybs) == 0:
return test_oligo
elif self.debug:
for hyb in hybs:
print hyb
for shrink in range(self.max_shrink):
test_oligo = oligo.copy()
test_oligo.adjust_three(-(shrink + 1))
if self.debug:
print "Shrink", test_oligo
if test_oligo.bad_size(self.min_oligo_size,
self.max_oligo_size) == 0:
hybs = analyzer.check_oligo(test_oligo.get_seq(),
full_seq)
if len(hybs) == 0:
return test_oligo
elif self.debug:
for hyb in hybs:
print hyb
raise OligoRepairError("Could not adjust to fix issue: %s\n>%s\n%s" %
("\n".join([str(x) for x in hybs]), name, full_seq))
def _fix_five(self, name, oligo, full_seq, analyzer):
"""Fix an issue with the 5' end of an oligo.
If the 5' end of the oligo cross-reacts, try to fix the problem
by adjusting the end of the oligo.
"""
hybs = []
if self.debug:
print "5", oligo
for expand in range(self.max_expand):
test_oligo = oligo.copy()
test_oligo.adjust_five(expand + 1)
if self.debug:
print "Expand", test_oligo
if not test_oligo.bad_size(self.min_oligo_size,
self.max_oligo_size):
hybs = analyzer.check_oligo(test_oligo.get_seq(),
full_seq)
if len(hybs) == 0:
return test_oligo
elif self.debug:
for hyb in hybs:
print hyb
for shrink in range(self.max_shrink):
test_oligo = oligo.copy()
test_oligo.adjust_five(-(shrink + 1))
if self.debug:
print "Shrink", test_oligo
if not test_oligo.bad_size(self.min_oligo_size,
self.max_oligo_size):
hybs = analyzer.check_oligo(test_oligo.get_seq(),
full_seq)
if len(hybs) == 0:
return test_oligo
elif self.debug:
for hyb in hybs:
print hyb
raise OligoRepairError("Could not adjust to fix issue: %s\n>%s\n%s" %
("\n".join([str(x) for x in hybs]), name, full_seq))
class CrossOligoRepair:
def __init__(self, min_nts, max_nts, min_gcs, max_expand = 8, max_shrink =
2, loose_flex = 1, desperate_flex = 2, adjust_amount = 6):
self.single_repair = _SingleOligoRepair(min_nts, max_nts, min_gcs,
max_expand, max_shrink, loose_flex, desperate_flex)
self.adjust_amount = adjust_amount
def set_oligo_min_max(self, min_size, max_size):
"""Set the range of sizes for oligos.
"""
self.single_repair.set_oligo_min_max(min_size, max_size)
self.min_oligo_size = min_size
self.max_oligo_size = max_size
def set_melt_info(self, melt_calc, melt_thresh):
self.melt_calc = melt_calc
self.melt_thresh = melt_thresh
def check_oligos(self, name, oligos, full_sequence):
"""Check all oligos and repair cross hybridization problems.
"""
oligo_manager = _WorkOligoManager(oligos, self.min_oligo_size,
self.max_oligo_size, self.melt_calc, self.melt_thresh)
while 1:
try:
for index, oligo in enumerate(oligo_manager.get_cur_oligos()):
self.single_repair.check_oligo(name, oligo, full_sequence)
break
# if we get an error, we need to adjust and try again
except OligoRepairError, msg:
num_adjust = 0
while 1:
# four different adjustments to try and fix issues
# 1. shrink the previous oligo (5' end change)
if num_adjust == 0:
target, change = oligo_manager.adjust_oligo_and_prev(
index, -self.adjust_amount)
# 2. expand the previous oligo (5' end change)
elif num_adjust == 1:
target, change = oligo_manager.adjust_oligo_and_prev(
index, self.adjust_amount)
# 3. shrink the next oligo (3' end change)
elif num_adjust == 2:
target, change = oligo_manager.adjust_oligo_and_next(
index, -self.adjust_amount)
# 4. expand the next oligo (3' end change)
elif num_adjust == 3:
target, change = oligo_manager.adjust_oligo_and_next(
index, self.adjust_amount)
# give up and raise an error that we can't fix the problem
else:
raise OligoRepairError(
"Can't fix oligo %s %s: %s; %s" %
(name, index, oligo, msg))
is_good = self._check_change_oligos(name, target, change,
full_sequence, oligo_manager)
if is_good:
break
else:
num_adjust += 1
def _check_change_oligos(self, name, target, change, full_sequence,
oligo_manager):
"""Finish up an adjustment, checking oligos for lingering issues.
This checks if a change worked to fix problems, returning True if so
or False if not. If a change did work, then the oligos in the manager
are changed to the good changes.
"""
if target is None:
assert change is None
return False
try:
self.single_repair.check_oligo(name, target.raw_oligo(),
full_sequence)
self.single_repair.check_oligo(name, change.raw_oligo(),
full_sequence)
except OligoRepairError:
return False
# if we got here everything is good
oligo_manager.replace_oligo(target)
oligo_manager.replace_oligo(change)
return True
class OligoAnalyzer:
"""Analyze designed oligos, looking for issues which need changes.
This looks at designed oligos for problematic 3' or 5' ends, internal
hairpins, and other issues.
"""
def __init__(self, single_analyses = [], multi_analyses = []):
self.single_analyses = single_analyses
self.multi_analyses = multi_analyses
def set_oligo_min_max(self, min_size, max_size):
"""Set maximum and minimum oligo sizes, presumedly from splitter info
"""
for analysis in self.single_analyses:
analysis.set_oligo_min_max(min_size, max_size)
for analysis in self.multi_analyses:
analysis.set_oligo_min_max(min_size, max_size)
def set_melt_info(self, melt_calc, melt_thresh):
"""Set information on melting temperatures.
"""
for analysis in self.single_analyses:
analysis.set_melt_info(melt_calc, melt_thresh)
for analysis in self.multi_analyses:
analysis.set_melt_info(melt_calc, melt_thresh)
def analyze_oligos(self, name, oligos, full_sequence):
"""Analyze a list of OligoRegions for potential problems.
"""
self._single_analysis(self.single_analyses, name, oligos, full_sequence)
self._multi_analysis(self.multi_analyses, name, oligos, full_sequence)
def _multi_analysis(self, analyses, name, oligos, full_sequence):
"""Perform analyses that look at all oligos at once.
"""
for analysis in analyses:
analysis.check_oligos(name, oligos, full_sequence)
def _single_analysis(self, analyses, name, oligos, full_sequence):
"""Do analyses that require looking at oligos one at a time.
"""
for oligo in oligos:
for analysis in analyses:
analysis.check_oligo(name, oligo, full_sequence)
class _HairpinProblem:
"""Hold information on a hairpin region detected in a sequence.
"""
def __init__(self, sequence, a_start, a_end, b_start, b_end):
"""Define two regions on a sequence with a hairpin issue.
"""
self.seq = sequence
self.a_region = (a_start, a_end)
self.b_region = (b_start, b_end)
def __str__(self):
out = "Hairpin : %s,%s (%s) vs. %s,%s (%s)" % (
self.a_region[0], self.a_region[1],
self.seq[self.a_region[0]:self.a_region[1]],
self.b_region[0], self.b_region[1],
self.seq[self.b_region[0]:self.b_region[1]])
return out
def __hash__(self):
return hash((self.a_region, self.b_region))
def __cmp__(self, other):
"""Determine if two detected hairpins are identical.
"""
return cmp((self.a_region, self.b_region),
(other.a_region, other.b_region))
def expand(self):
"""Expand the initially found hairpin match if possible.
"""
self.expand_five()
self.expand_three()
def expand_five(self):
"""Expand this issue as far as possible in the 5' direction
The 5' expansion is relative to the a_region, so 3' expanding for the
b_region.
"""
cur_a_start, cur_a_end = self.a_region
cur_b_start, cur_b_end = self.b_region
# print "5'", cur_a_start, cur_a_end, cur_b_start, cur_b_end
expand = 0
while 1:
expand += 1
new_a_test = cur_a_start - expand
# subtract by 1 for end sequences since the end coordinate is
# accessed differently than the range
# >>> b = "012345"
# >>> b[1:4]
# '123'
# >>> b[4]
# '4'
new_b_test = cur_b_end + expand - 1
if new_a_test < 0 or new_b_test >= len(self.seq):
break
if not self._hairpin_matches(new_a_test, new_b_test):
break
# adjust by the expanded amount
# last expansion caused the problem, so we retract by one
expand -= 1
new_a_start = cur_a_start - expand
new_b_end = cur_b_end + expand
self.a_region = (new_a_start, cur_a_end)
self.b_region = (cur_b_start, new_b_end)
def _hairpin_matches(self, first_index, second_index):
"""Decide if the two bases match (are complements of each other).
"""
first_base = self.seq[first_index]
second_base = self.seq[second_index]
#print first_index, first_base, second_index, second_base
if IUPACData.ambiguous_dna_complement[first_base] == second_base:
return 1
else:
return 0
def expand_three(self):
"""Expand this hairpin as far as possible in the 3' direction.
The 3' expansion is relative to the a_region, so 5' expanding for the
b_region.
"""
cur_a_start, cur_a_end = self.a_region
cur_b_start, cur_b_end = self.b_region
#print "3'", cur_a_start, cur_a_end, cur_b_start, cur_b_end
expand = 0
while 1:
expand += 1
new_a_test = cur_a_end + expand - 1
new_b_test = cur_b_start - expand
if new_b_test < new_a_test:
break
if not self._hairpin_matches(new_a_test, new_b_test):
break
# adjust by the expanded amount
# last expansion caused the problem, so we retract by one
expand -= 1
new_a_end = cur_a_end + expand
new_b_start = cur_b_start - expand
self.a_region = (cur_a_start, new_a_end)
self.b_region = (new_b_start, cur_b_end)
def will_extend_five(self, new_nts):
"""Determine if the added 5' nucleotides will increase the hairpin.
These nucleotides are assumed to be added at the 5' end of the 5' most
end of the hairpin, and thus the hairpin must be at the 5' end of the
oligo.
"""
# if we are on the 5' end
if self.a_region[0] == 0:
other_start = self.b_region[1]
other_end = self.b_region[1] + len(new_nts)
# check if we are off the 3' end of the oligo
if other_end > len(self.seq):
return 0
else:
test_nts = self.seq[other_start:other_end]
rc_test_nts = Seq(test_nts,
unambiguous_dna).reverse_complement().data
assert len(rc_test_nts) == len(new_nts), (rc_test_nts, new_nts,
self.seq, self.a_region, self.b_region)
if rc_test_nts == new_nts:
return 1
# if we got here, we can't extend
return 0
def will_extend_three(self, new_nts):
"""Determine if the added 3' nucleotides will increase the hairpin.
These nucleotides are assumed to be added at the 3' end of the 3' most
end of the hairpin, and thus the hairpin must be at the 3' end of the
oligo. So, this checks the opposite side of will_extend_five
"""
# if we are on the 3' end
if self.b_region[1] == len(self.seq):
other_start = self.a_region[0] - len(new_nts)
other_end = self.a_region[0]
# check if we are off the 5' end of the oligo
if other_start < 0:
return 0
else:
test_nts = self.seq[other_start:other_end]
rc_test_nts = Seq(test_nts,
unambiguous_dna).reverse_complement().data
assert len(rc_test_nts) == len(new_nts), (rc_test_nts, new_nts)
if rc_test_nts == new_nts:
return 1
# if we got here, we can't extend
return 0
class HairpinAnalysis:
"""Search for hairpin regions within designed oligos.
This looks for a exact hairpins (defined as the presence of a sequence and
its reverse complement in the same oligo), and returns them.
"""
def __init__(self, min_hairpin):
self.name = HAIRPIN
self.min_hairpin = min_hairpin
def set_oligo_min_max(self, min_size, max_size):
"""We don't need this information.
"""
pass
def set_melt_info(self, melt_calc, melt_thresh):
"""We don't need this information.
"""
pass
def check(self, sequence):
"""Check a sequence returning hairpin problem regions.
"""
all_seed_hairpins = self._find_seed_hairpins(sequence)
expanded_hairpins = []
for hairpin in all_seed_hairpins:
hairpin.expand()
expanded_hairpins.append(hairpin)
# remove any duplicates using sets
final_hairpins = list(set(expanded_hairpins))
return _OligoProblems(sequence, final_hairpins)
def _find_seed_hairpins(self, oligo):
"""Find any hairpin regions which meet the minimum size requirements.
This is a very simple window iterator which walks across the sequence
and searches for the reverse complement of the given sequence. If
found, a seed oligo is created.
"""
hairpins = []
for index in range(len(oligo) - self.min_hairpin):
cur_start = index
cur_end = index + self.min_hairpin
cur_test = oligo[cur_start:cur_end]
rc_search = Seq(cur_test, unambiguous_dna).reverse_complement().data
pat = re.compile(rc_search)
matches = pat.finditer(oligo)
for match in matches:
match_start, match_end = match.span()
if match_start not in range(cur_start, cur_end) and \
match_end not in range(cur_start, cur_end):
# set the ends as the 5' most end of the hairpin first
#print cur_start, cur_end, match_start, match_end
if cur_start < match_start:
assert cur_end <= match_start
hairpins.append(_HairpinProblem(oligo, cur_start,
cur_end, match_start, match_end))
else:
assert cur_start >= match_end
hairpins.append(_HairpinProblem(oligo, match_start,
match_end, cur_start, cur_end))
# remove any duplicates using sets
unique_hairpins = list(set(hairpins))
return unique_hairpins