Sarah Richardson avatar Sarah Richardson committed f2ae27a

Remove segmentation functions from gene design

Comments (0)

Files changed (6)

bin/GD_Design_Building_Blocks.pl

-#!/usr/bin/env perl
-
-use Bio::GeneDesign;
-use Bio::GeneDesign::Basic qw(:GD);
-use Bio::Annotation::Comment;
-use Getopt::Long;
-use Pod::Usage;
-use POSIX;
-use Carp;
-
-use strict;
-use warnings;
-
-my $VERSION = '5.00';
-my $GDV = "GD_Design_Building_Blocks_$VERSION";
-my $GDS = "_BB";
-
-local $| = 1;
-
-##Get Arguments
-my %p = ();
-GetOptions (
-      'input=s'         => \$p{INPUT},
-      'output=s'        => \$p{OUTPUT},
-      'format=s'        => \$p{FORMAT},
-      'length:i'        => \$p{TARBBLEN},
-      'maxlength:i'     => \$p{MAXBBLEN},
-      'minlength:i'     => \$p{MINBBLEN},
-      'lap:i'           => \$p{TARBBLAP},
-      'list:s'          => \$p{LIST},
-      'verbose'         => \$p{VERBOSE},
-      'cloningvector=s' => \$p{CLONVEC},
-      'destvector=s'    => \$p{DESTVEC},
-      'simple'          => \$p{SIMPLE},
-      'stitch:i'        => \$p{STITCH},
-      'help'            => \$p{HELP}
-);
-
-################################################################################
-################################ SANITY  CHECK #################################
-################################################################################
-pod2usage(-verbose=>99, -sections=>"NAME|VERSION|DESCRIPTION|ARGUMENTS|USAGE")
-  if ($p{HELP});
-
-my $GD = Bio::GeneDesign->new();
-
-$p{LIST} = $p{LIST} || "outside";
-  
-#The input file must exist and be a format we care to read.
-die "\n GDERROR: You must supply an input file.\n"
-  if (! $p{INPUT});
-  
-my ($iterator, $filename, $suffix) = $GD->import_seqs($p{INPUT});
-
-$p{FORMAT} = $p{FORMAT} || $suffix || "genbank";
-
-#The output path must exist, and we'll need it to end with a slash
-$p{OUTPUT} = $p{OUTPUT} || ".";
-$p{OUTPUT} .= "/" if (substr($p{OUTPUT}, -1, 1) !~ /[\/]/);
-die "\n GDERROR: $p{OUTPUT} does not exist.\n"
-  if ($p{OUTPUT} && ! -e $p{OUTPUT});
-
-$p{TARBBLEN} = $p{TARBBLEN} || 1000;
-$p{MAXBBLEN} = $p{MAXBBLEN} || 1023;
-$p{MINBBLEN} = $p{MINBBLEN} || 200;
-die "\n ERROR: building block size is outside of allowable range.\n"
-  if ($p{TARBBLEN} < $p{MINBBLEN} || $p{TARBBLEN} > $p{MAXBBLEN});
-
-################################################################################
-################################# CONFIGURING ##################################
-################################################################################
-my @seqstowrite;
-my @seqstoignore;
-my @seqstofail;
-
-$p{TARBBLAP} = $p{TARBBLAP}  ||  40;
-
-#Set up two restriction enzyme sets for filtering enzymes through
-my $ORE = Bio::GeneDesign->new();
-$ORE->set_restriction_enzymes(-enzyme_set => $p{LIST});
-my $ARE = Bio::GeneDesign->new();
-$ARE->set_restriction_enzymes(-enzyme_set => "blunts");
-
-#Set up vectors, if necessary; note which enzymes should be excluded
-my %excludes;
-my ($destvec, $clonvec) = (undef, undef);
-my (%fpnos, %tpnos) = ((), ());
-if ($p{DESTVEC})
-{
-  $destvec = $GD->load_vector(-name => $p{DESTVEC});
-  $excludes{$_}++ foreach $destvec->enzyme_list;
-
-  my $stat5 = $ARE->restriction_status(-sequence => $destvec->chew5);
-  %fpnos = map {$_ => 1} grep {$stat5->{$_} != 0} keys %{$stat5};
-
-  my $stat3 = $ARE->restriction_status(-sequence => $destvec->chew3);
-  %tpnos = map {$_ => 1} grep {$stat3->{$_} != 0} keys %{$stat3};
-}
-if ($p{CLONVEC})
-{
-  $clonvec = $GD->load_vector(-name => $p{CLONVEC});
-  $excludes{$_}++ foreach $clonvec->enzyme_list;
-}
-my @enzes = keys %excludes;
-
-my @outsides = values %{$ORE->enzyme_set};
-$ARE->add_to_enzyme_set(-enzymes => \@outsides);
-
-
-################################################################################
-################################### CARVING ####################################
-################################################################################
-while ( my $obj = $iterator->next_seq() )
-{
-  my $chunkname = $obj->id;
-  my $chseq = uc $obj->seq;
-  my $chlen = length($chseq);
-  my $sublet = 'A';
-  
-  #Check that the chunk has not already been carved
-  my @bbs = grep {$_->primary_tag eq "building_block"} $obj->get_SeqFeatures;
-  if (scalar(@bbs))
-  {
-    print "\t" . $obj->id . " already has building blocks... skipping\n";
-    push @seqstoignore, $obj;
-    push @seqstowrite, $obj;
-    next;
-  }
-
-  #Attempt to carve the chunk
-  print "Working on $chunkname ($chlen bp)...\n";
-  my $tarbblen = $chlen < $p{TARBBLEN}  ? $chlen  : $p{TARBBLEN};
-  my $BBS = $GD->carve_building_blocks(
-          -sequence       => $obj,
-          -algorithm      => "enzyme",
-          -target_length  => $tarbblen,
-          -max_length     => $p{MAXBBLEN},
-          -min_length     => $p{MINBBLEN},
-          -overlap_length => $p{TARBBLAP},
-          -excludes       => \@enzes,
-          -fpexcludes     => \%fpnos,
-          -tpexcludes     => \%tpnos,
-          -verbose        => $p{VERBOSE}
-  );
-  my $num = scalar(@$BBS);
-  unless ($num > 0)
-  {
-    print "\t", $obj->id . " cannot be made into building blocks... skipping\n";
-    push @seqstofail, $obj;
-    push @seqstowrite, $obj;
-    next;
-  }
-  
-  
-  #Decide which chunks should have flanking enzymes
-  my @FINALS;
-  if ($num > 5)
-  {
-    my @proced;
-    my $f = floor $num / 4;
-    my $t = (4 - ($num % 4)) % 4;
-    $f = $f - ($t - 1) if ($t > 1);
-    my $arr = 4 x $f . 3 x $t;
-    foreach my $size (split(q{}, $arr))
-    {
-      my @subset;
-      for (my $x = 1; $x <= $size; $x++)
-      {
-        my $bb = shift @$BBS;
-        push @subset, $bb;
-      }
-
-      my $first = shift @subset;
-      my $last = pop @subset;
-      my $name = join(q{}, $first->get_tag_values('name')) . "-" 
-               . join(q{}, $last->get_tag_values('name'));
-      my $subseq = substr($chseq, $first->start -1, $last->end - $first->start + 1);
-      
-      if (! scalar @proced)
-      {
-        my $inenzid = join(q{}, $last->get_tag_values("enz_t"));
-        my $inenz = $ARE->enzyme_set->{$inenzid};
-        my $prepend = $destvec  ? $destvec->chew5  . $subseq  : $subseq;
-        my $outenzarr = pick_enzyme($prepend, $inenz, $clonvec, undef);
-        carp ("\t$name has no outside enzyme available") unless ($outenzarr);
-          
-        clean_first($first, $outenzarr, 0, $clonvec, $destvec);
-        clean_last($last, undef, 1, $clonvec, undef);
-      }
-      elsif (! scalar @$BBS)
-      {
-        my $inenzid = join(q{}, $first->get_tag_values("enz_f"));
-        my $inenz = $ARE->enzyme_set->{$inenzid};
-        my $postpend = $destvec  ? $subseq . $destvec->chew3  : $subseq;
-        my $outenzarr = pick_enzyme($postpend, $inenz, undef, $clonvec);
-        carp ("\t$name has no outside enzyme available") unless ($outenzarr);
-          
-        clean_first($first, undef, 1, $clonvec, undef);
-        clean_last($last, $outenzarr, 0, $clonvec, $destvec);
-      }
-      else
-      {
-        clean_first($first, undef, 1, $clonvec, undef);
-        clean_last($last, undef, 1, $clonvec, undef);
-      }
-      $first->add_tag_value("subset", $sublet);
-      $last->add_tag_value("subset", $sublet);
-      push @proced, $first, $last;
-
-      foreach my $bb (@subset)
-      {
-        $bb->add_tag_value("subset", $sublet);
-        clean_mundane($bb, $clonvec);
-      }
-      push @proced, @subset;
-      $sublet++;
-    }
-    push @FINALS, @proced;
-  }
-
-  elsif ($num > 1)
-  {
-    my $first = shift @$BBS;
-    my $last = pop @$BBS;
-    my @mundane = @$BBS;
-    if ($p{SIMPLE})
-    {
-      clean_first($first, undef, 0, $clonvec, $destvec);
-      clean_last($last, undef, 0, $clonvec, $destvec);
-    }
-    else
-    {
-      my $name = join(q{}, $first->get_tag_values('name')) . "-" 
-               . join(q{}, $last->get_tag_values('name'));
-      my $rseq = $destvec ? $destvec->chew5 . $chseq . $destvec->chew3 : $chseq;
-      $rseq = $clonvec ? $clonvec->chew5 . $rseq . $clonvec->chew3 : $rseq;
-      my $outenzarr = pick_enzyme($rseq);
-      carp ("\t$name has no outside enzyme available") unless ($outenzarr);
-    
-      clean_first($first, $outenzarr, 0, $clonvec, $destvec);
-      clean_last($last, $outenzarr, 0, $clonvec, $destvec);
-    }
-    foreach my $bb (@mundane)
-    {
-      clean_mundane($bb, $clonvec);
-    }
-    push @FINALS, $first;
-    push @FINALS, @mundane;
-    push @FINALS, $last;
-  }
-
-  else
-  {
-    my $bb = shift @$BBS;
-    if ($p{SIMPLE})
-    {
-      clean_only($bb, undef, $clonvec, $destvec);
-    }
-    else
-    {
-      my $name = join(q{}, $bb->get_tag_values('name'));
-      my $rseq = $destvec ? $destvec->chew5 . $chseq . $destvec->chew3 : $chseq;
-      $rseq = $clonvec ? $clonvec->chew5 . $rseq . $clonvec->chew3 : $rseq;
-      my $outenzarr = pick_enzyme($rseq);
-      carp ("\t$name has no outside enzyme available") unless ($outenzarr);
-    
-      clean_only($bb, $outenzarr, $clonvec, $destvec);
-    }
-    push @FINALS, $bb;
-  }
-  
-  if ($p{DESTVEC} || $p{CLONVEC})
-  {
-    my $vector = $p{DESTVEC}  ? $p{DESTVEC}  : $p{CLONVEC};
-    #$obj->id("(" . $vector . ")$chunkname");
-    
-    my $collec = $obj->annotation;
-    my $comment = Bio::Annotation::Comment->new();
-    $comment->text("destination_vector = $vector");
-    $collec->add_Annotation('comment', $comment);
-    $obj->annotation($collec);
-  }
-  foreach my $bb (sort {$a->start <=> $b->start} @FINALS)
-  {
-    if ($p{STITCH})
-    {
-      my $bbseq = substr($chseq, $bb->start - 1, $bb->end - $bb->start + 1);
-      my ($lprimer, $rprimer) = $GD->make_amplification_primers(
-          -sequence    => $bbseq,
-          -temperature => $p{STITCH}
-      );
-      unless ($bb->has_tag("enz_f"))
-      {
-        $bb->add_tag_value("stitch_left", $lprimer);
-      }
-      unless ($bb->has_tag("enz_t"))
-      {
-        $bb->add_tag_value("stitch_right", $rprimer);
-      }
-    }
-    $bb->add_tag_value("vector", $p{CLONVEC}) if ($p{CLONVEC});
-    #$bb->add_tag_value("gdversion", $GDV);
-    $obj->add_SeqFeature($bb);
-  }
-  push @seqstowrite, $obj;
-}
-
-#Maybe report
-
-print "\n\n";
-my $outputfilename = $filename  . $GDS . "." . $p{FORMAT};
-my $reportpath = $p{OUTPUT} . $filename . "_BBreport.txt";
-open (my $REP, '>', $reportpath);
-if (scalar @seqstoignore)
-{
-  print $REP $_->id . " : IGNORED FOR BUILDING BLOCKS\n" foreach @seqstoignore;
-}
-if (scalar @seqstofail)
-{
-  print $REP $_->id . " : FAILED BUILDING BLOCKS\n" foreach @seqstofail;
-}
-close $REP;
-if (scalar @seqstowrite)
-{
-  $GD->export_seqs(
-          -filename   => $outputfilename,
-          -path       => $p{OUTPUT},
-          -format     => $p{FORMAT},
-          -sequences  => \@seqstowrite
-  );
-}
-
-print "\n";
-print "Wrote " . $p{OUTPUT} . "$outputfilename\n";
-print "Wrote $reportpath\n";
-print "\n";
-print $GD->attitude() . " brought to you by $GDV\n\n";
-
-exit;
-
-sub clean_first
-{
-  my ($bb, $outenzarr, $inenz, $clonvec, $destvec) = @_;
-  my ($fprefix, $fsuffix) = (q{}, q{});
-  if ($outenzarr)
-  {
-    $fprefix = $outenzarr->[1];
-    $bb->add_tag_value("enz_f", $outenzarr->[0]);
-  }
-  if ($destvec)
-  {
-    $fprefix .= $destvec->chew5;
-  }
-  if ($inenz)
-  {
-    $fprefix = join(q{}, $bb->get_tag_values("zne_f"));
-    $fprefix = $GD->replace_ambiguous_bases($fprefix);
-  }
-  if ($clonvec)
-  {
-    $fprefix = $clonvec->chew5 . $fprefix;
-    $fsuffix .= $clonvec->chew3;
-  }
-
-  $bb->remove_tag("enz_t") if ($bb->has_tag("enz_t"));
-  $bb->remove_tag("zne_t") if ($bb->has_tag("zne_t"));
-  $bb->remove_tag("zne_f") if ($bb->has_tag("zne_f"));
-  $bb->add_tag_value("BBsuffix", $fsuffix) if ($fsuffix);
-  $bb->add_tag_value("BBprefix", $fprefix) if ($fprefix);
-  return;
-}
-
-sub clean_last
-{
-  my ($bb, $outenzarr, $inenz, $clonvec, $destvec) = @_;
-  my ($lprefix, $lsuffix) = (q{}, q{});
-  if ($outenzarr)
-  {
-    $lsuffix = $GD->complement($outenzarr->[1], 1);
-    $bb->add_tag_value("enz_t", $outenzarr->[0]);
-  }
-  if ($destvec)
-  {
-    $lsuffix = $destvec->chew3 . $lsuffix;
-  }
-  if ($inenz)
-  {
-    my $znet = join(q{}, $bb->get_tag_values("zne_t"));
-    $znet = $GD->replace_ambiguous_bases($znet);
-    $lsuffix .= $znet;
-  }
-  if ($clonvec)
-  {
-    $lprefix = $lprefix . $clonvec->chew5;
-    $lsuffix .= $clonvec->chew3;
-  }
-  $bb->remove_tag("enz_f") if ($bb->has_tag("enz_f"));
-  $bb->remove_tag("zne_f") if ($bb->has_tag("zne_f"));
-  $bb->remove_tag("zne_t") if ($bb->has_tag("zne_t"));
-  $bb->add_tag_value("BBsuffix", $lsuffix) if ($lsuffix);
-  $bb->add_tag_value("BBprefix", $lprefix) if ($lprefix);
-  return;
-}
-
-sub clean_only
-{
-  my ($bb, $outenzarr, $clonvec, $destvec) = @_;
-  
-  my ($prefix, $suffix) = (q{}, q{});
-  if ($outenzarr)
-  {
-    $prefix = $outenzarr->[1];
-    $suffix = $GD->complement($outenzarr->[1], 1);
-    $bb->add_tag_value("enz_t", $outenzarr->[0]);
-    $bb->add_tag_value("enz_f", $outenzarr->[0]);
-  }
-  if ($clonvec)
-  {
-    $suffix .= $clonvec->chew3;
-    $prefix = $clonvec->chew5 . $prefix;
-    if ($destvec)
-    {
-      $prefix .= $destvec->chew5;
-      $suffix = $destvec->chew3 . $suffix;
-    }
-  }
-  $bb->add_tag_value("BBsuffix", $suffix) if ($suffix);
-  $bb->add_tag_value("BBprefix", $prefix) if ($prefix);
-  return;
-}
-
-sub clean_mundane
-{
-  my ($bb, $clonvec) = @_;
-  my ($nprefix, $nsuffix) = (q{}, q{});
-  $bb->remove_tag("zne_t") if ($bb->has_tag("zne_t"));
-  $bb->remove_tag("enz_t") if ($bb->has_tag("enz_t"));
-  if ($clonvec)
-  {
-    $nprefix = $clonvec->chew5;
-    $nsuffix .= $clonvec->chew3;
-  }
-  $bb->remove_tag("zne_f") if ($bb->has_tag("zne_f"));
-  $bb->remove_tag("enz_f") if ($bb->has_tag("enz_f"));
-  
-  $bb->add_tag_value("BBprefix", $nprefix) if ($nprefix);
-  $bb->add_tag_value("BBsuffix", $nsuffix) if ($nsuffix);
-  return;
-}
-
-sub pick_enzyme
-{
-  my ($seq, $enz, $cprepend, $cpostpend) = @_;
-  
-  my $substats = $ORE->restriction_status(-sequence => $seq);
-  my @candidates =  sort {$a->score <=> $b->score}
-                    grep {$substats->{$_->id} == 0}
-                    map {$ORE->enzyme_set->{$_}}
-                    keys %{$substats};
-  my $outenz = $candidates[0];
-  
-  return q{} unless (scalar @candidates);
-  
-  if ($enz)
-  {
-    my @filtered = grep {$enz->temp == $_->temp} @candidates;
-    $outenz = scalar @filtered ? $filtered[0] : $outenz;
-    
-    @filtered = grep {$_->common_buffers($enz, 1)} @filtered;
-    $outenz = scalar @filtered ? $filtered[0] : $outenz;
-    
-    if ($cprepend || $cpostpend)
-    {
-      my @refiltered = ();
-      foreach my $potenz (@filtered)
-      {
-        if ($cprepend)
-        {
-          my $tempseq = $cprepend->chew5 . $potenz->seq;
-          my $positions = $enz->positions($tempseq);
-          push @refiltered, $potenz if (scalar keys %{$positions} == 0);
-        }
-        if ($cpostpend)
-        {
-          my $tempseq = $potenz->seq . $cpostpend->chew3;
-          my $positions = $enz->positions($tempseq);
-          push @refiltered, $potenz if (scalar keys %{$positions} == 0);
-        }
-      }
-      @filtered = @refiltered;
-      $outenz = scalar @filtered ? $filtered[0] : $outenz;
-    }
-  }
-  
-  my $outenzseq = $ORE->replace_ambiguous_bases($outenz->seq);
-  my $rand = $ORE->random_dna(-length => $outenz->inside_cut);
-  $outenzseq = $outenzseq . $rand;
-  my $finalseq = $seq . $outenzseq . $seq;
-  #Check if the randomly generated sequence contains the site twice !
-  my $positions = $GD->positions(-sequence => $outenzseq, -query => $finalseq);
-  while (scalar keys %{$positions} != 1)
-  {
-    $outenzseq = $ORE->replace_ambiguous_bases($outenz->seq);
-    $rand = $ORE->random_dna(-length => $outenz->inside_cut);
-    $outenzseq = $outenzseq . $rand;
-    $finalseq = $seq . $outenzseq . $seq;
-    $positions = $GD->positions(-sequence => $finalseq, -query => $outenz);
-  }
-  
-  return [$outenz->id, $outenzseq];
-}
-
-__END__
-
-=head1 NAME
-
-  GD_Design_Building_Blocks.pl
-
-=head1 VERSION
-
-  Version 5.00
-
-=head1 DESCRIPTION
-
-    The Design_Building_Blocks_JGI script will break each nucleotide sequence it
-    is given into evenly sized Building Blocks, which can be composed of sets of
-    overlapping oligos.
-
-    Output will be tagged with the GDbb suffix. Default values are assumed for
-    every parameter not provided.
-
-    Any sequence provided that is less than one and a half times the target
-    building block size will not be divided.
-
-    Length Overlap: Building Blocks will overlap by a user-defined overlap
-    length parameter. Input sequences must be at least 1000 bp long. An optional
-    parameter will force building blocks to avoid the presence of more than one
-    specified subsequence per building block.
-
-=head1 ARGUMENTS
-
-  Required arguments:
-
-    -i,  --input : a file containing DNA sequences.
-
-  Optional arguments:
-  
-    -o, --output : a path in which to deposit building block sequences
-    -f, --format : default genbank
-    -le, --length: the length in bp of building blocks (default is 1000)
-    -maxle, --maxlength: the maximum length a building block is allowed to be.
-    -minle, --minlength: the minimum length a building block is allowed to be.
-    -la, --lap: the target overlap between building blocks. (default is 40)
-    -c, --cloningvector: A vector in config/vectors that is to serve as the
-        cloning vector
-    -d, --destvector: A vector in config/vectors that is to serve as the
-        destination vector
-    -si, --simple: If a chunk has five or fewer building blocks, don't bother
-        assigning enzymes to them
-    -st, --stitch: The target melting temperature of assembly primers for
-        the assembly of building blocks.
-    --verbose : show deliberations happening
-    -h,  --help: display this message
-
-=head1 COPYRIGHT AND LICENSE
-
-Copyright (c) 2012, GeneDesign developers
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice, this
-list of conditions and the following disclaimer in the documentation and/or
-other materials provided with the distribution.
-
-* The names of Johns Hopkins, the Joint Genome Institute, the Lawrence Berkeley
-National Laboratory, the Department of Energy, and the GeneDesign developers may
-not be used to endorse or promote products derived from this software without
-specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=cut

bin/GD_Design_Oligos.pl

-#!/usr/bin/env perl
-
-use Bio::GeneDesign;
-use Getopt::Long;
-use Pod::Usage;
-
-use strict;
-use warnings;
-
-my $VERSION = '5.00';
-my $GDV = "GD_Design_Oligos_JGI_$VERSION";
-my $GDS = "_OL";
-
-local $| = 1;
-
-##Get Arguments
-my %p = ();
-GetOptions (
-      'input=s'       => \$p{INPUT},
-      'output=s'      => \$p{OUTPUT},
-      'olilen:i'      => \$p{OLILEN},
-      'laptemp:i'     => \$p{LAPTEMP},
-      'poolsize:i'    => \$p{POOLSIZE},
-      'maxpoolnum:i'  => \$p{MAXPOOLNUM},
-      'maxolilen:i'   => \$p{MAXOLILEN},
-      'minolilen:i'   => \$p{MINOLILEN},
-      'minlaplen:i'   => \$p{MINLAPLEN},
-      'alternate'     => \$p{ALTERNATE},
-      'tolerance:f'   => \$p{TMTOL},
-      'verbose'       => \$p{VERBOSE},
-      'format=s'      => \$p{FORMAT},
-      'help'          => \$p{HELP}
-);
-
-################################################################################
-################################ SANITY  CHECK #################################
-################################################################################
-pod2usage(-verbose=>99, -sections=>"NAME|VERSION|DESCRIPTION|ARGUMENTS|USAGE")
-  if ($p{HELP});
-
-my $GD = Bio::GeneDesign->new();
-
-#The input file must exist and be a format we care to read.
-die "\n GDERROR: You must supply an input file.\n"
-  if (! $p{INPUT});
-my ($iterator, $filename, $suffix) = $GD->import_seqs($p{INPUT});
-
-$p{FORMAT} = $p{FORMAT} || $suffix || "genbank";
-
-#The output path must exist, and we'll need it to end with a slash
-$p{OUTPUT} = $p{OUTPUT} || ".";
-$p{OUTPUT} .= "/" if (substr($p{OUTPUT}, -1, 1) !~ /[\/]/);
-die "\n GDERROR: $p{OUTPUT} does not exist.\n"
-  if ($p{OUTPUT} && ! -e $p{OUTPUT});
-
-################################################################################
-################################# CONFIGURING ##################################
-################################################################################
-my @seqstowrite;
-my @seqstoignore;
-my @seqstofail;
-
-$p{TMTOL} = $p{TMTOL}  ||  .5;
-my $alg = $p{ALTERNATE}  ? 'JHU' : 'JGI';
-
-my $totlen = 0;
-my $totnum = 0;
-
-################################################################################
-################################## CHOPPING  ###################################
-################################################################################
-while ( my $obj = $iterator->next_seq() )
-{
-  my $chunkname = $obj->id;
-  my $chseq = uc $obj->seq;
-  my $chlen = length($chseq);
-  
-  print "Working on $chunkname...\n";
-  
-  my @seqfeats = $obj->get_SeqFeatures;
-  
-  #Does this sequence have building blocks?
-  my @bbs = grep {$_->primary_tag eq "building_block"} @seqfeats;
-  unless (scalar(@bbs))
-  {
-    print "\t$chunkname has no annotated building blocks... skipping!\n";
-    push @seqstoignore, $obj;
-    push @seqstowrite, $obj;
-    next;
-  }
-  
-  #Does this sequence already have assembly oligos?
-  my @cos = grep {$_->primary_tag eq "assembly_oligo"} @seqfeats;
-  if (scalar(@cos))
-  {
-    print "\t$chunkname already has assembly oligos... skipping!\n";
-    push @seqstoignore, $obj;
-    push @seqstowrite, $obj;
-    next;
-  }
-  
-  #Chop!
-  foreach my $bb (@bbs)
-  {
-    my $bbname = join q{}, $bb->get_tag_values('name');
-    my $oligos = $GD->chop_oligos(
-        -building_block   => $bb,
-        -algorithm        => $alg,
-        -oligo_len_min    => $p{MINOLILEN},
-        -oligo_len_max    => $p{MAXOLILEN},
-        -oligo_len        => $p{OLILEN},
-        -overlap_tm       => $p{LAPTEMP},
-        -overlap_len_min  => $p{MINLAPLEN},
-        -tm_tolerance     => $p{TMTOL},
-        -pool_size        => $p{POOLSIZE},
-        -pool_num_max     => $p{MAXPOOLNUM},
-        -verbose          => $p{VERBOSE}
-    );
-    my $num = scalar(@$oligos);
-    unless ($num > 0)
-    {
-      print "\t$bbname cannot be made into oligos... skipping\n\n";
-      push @seqstofail, $bbname;
-      next;
-    }
-    foreach my $oligo (@$oligos)
-    {
-      $obj->add_SeqFeature($oligo);
-      $totlen += length(join(q{}, $oligo->get_tag_values("sequence")));
-      $totnum ++;
-    }
-  }
-  push @seqstowrite, $obj;
-}
-
-#Maybe report
-
-print "\n\n";
-my $outputfilename = $filename  . $GDS . "." . $p{FORMAT};
-my $reportpath = $p{OUTPUT} . $filename . "_OLreport.txt";
-open (my $REP, '>', $reportpath);
-if (scalar @seqstoignore)
-{
-  print $REP $_->id . " : IGNORED FOR OLIGOS\n" foreach @seqstoignore;
-}
-if (scalar @seqstofail)
-{
-  print $REP $_ . " : FAILED OLIGOS\n" foreach @seqstofail;
-}
-close $REP;
-if (scalar @seqstowrite)
-{
-  $GD->export_seqs(
-          -filename   => $outputfilename,
-          -path       => $p{OUTPUT},
-          -format     => $p{FORMAT},
-          -sequences  => \@seqstowrite);
-}
-print "\n\n";
-
-print "\n";
-print "Wrote " . $p{OUTPUT} . "$outputfilename\n";
-print "Wrote $reportpath\n";
-print "\n";
-print $GD->attitude() . " brought to you by $GDV\n\n";
-
-exit;
-
-__END__
-
-=head1 NAME
-
-  GD_Design_Oligos.pl
-
-=head1 VERSION
-
-  Version 5.00
-
-=head1 DESCRIPTION
-
-    The Design_Oligos script will break each nucleotide sequence it is
-    given into a set of overlapping assembly oligonucleotides. It uses EMBOSS
-    palindrome to check for potential secondary structures.
-
-    Output will be named by the identification of the part, and will be tagged
-    with the _OL suffix. Default values are assumed for every parameter not
-    provided.
-
-    You must provide either a target oligo length or a minimum maximum range.
-    You can provide both.
-
-=head1 ARGUMENTS
-
-  Required arguments:
-
-    -i,  --input : a file containing building block sequences.
-
-
-  Optional arguments:
-    --output : a path in which to deposit oligo sequences.
-    --minlaplen : minimum length of overlap allowed
-    --laptemp : Tm in temperature degrees C of oligo overlaps (default 70)
-    --olilen : the length in bp of assembly oligos (default 180)
-    --minolilen : minimum length of assembly oligos permitted (default 45)
-    --maxolilen : maximum length of assembly oligos permitted (default 200)
-    --tolerance : amount of +/- variation allowed in Tm (default 2.5)
-    --poolsize : oligos will be pooled; GD will make bridging primers between
-                  pools of the specified size
-    --maxpoolnum : the maximum number of pools to create.
-    --alternate : design oligos such that they alternate on opposite strands;
-                the default is not facing, that is, all oligos except the very
-                last are on the same strand
-    --format : default genbank
-    --verbose : show deliberations happening
-    -h,  --help: display this message
-
-=cut

lib/Bio/GeneDesign/BuildingBlocks/enzyme.pm

-=head1 NAME
-
-Bio::GeneDesign::BuildingBlocks::enzyme
-
-=head1 VERSION
-
-Version 5.00
-
-=head1 DESCRIPTION
-
-=head1 AUTHOR
-
-Sarah Richardson <SMRichardson@lbl.gov>
-
-=cut
-
-package Bio::GeneDesign::BuildingBlocks::enzyme;
-require Exporter;
-
-use Bio::GeneDesign::Basic qw(:GD);
-use Bio::GeneDesign::Exceptions;
-use Bio::SeqFeature::Generic;
-use POSIX;
-
-use strict;
-use warnings;
-
-our $VERSION = 5.00;
-
-use base qw(Exporter);
-our @EXPORT_OK = qw(
-  _carve_building_blocks
-);
-our %EXPORT_TAGS =  ( GD => \@EXPORT_OK );
-  
-=head2 _carve_building_blocks()
-
-=cut
-
-sub _carve_building_blocks
-{
-  my ($GD, $seqobj, $tarbblen, $maxbblen, $minbblen, $tarbblap, $stitch, $excludes, $fpexcludes, $tpexcludes, $v) = @_;
-
-  my $minbblap = 25;
-  my $maxbblap = 50;
-  my $chseq = uc $seqobj->seq;
-  my $chlen = length($chseq);
-  $tarbblen  = $chlen if ($chlen < $tarbblen);
-  my $chname = $seqobj->id;
-
-  ## Decide what size bbs to go for and how many
-  ## Adjust the target building block size so as to avoid outliers.
-  my $tarnum = $chlen > $maxbblen 
-              ? ceil($chlen / ($tarbblen - $tarbblap))
-              : 1;
-  my $diff = $chlen - (($tarnum * $tarbblen) - ($tarbblap * ($tarnum - 1)));
-  print "\ttarget: $tarnum bbs size $tarbblen bp rem $diff\n" if ($v);
-  if (abs($diff) >= $tarnum)
-  {
-    my $rem = sprintf "%.0f", $diff / $tarnum;
-    $tarbblen = $tarbblen + $rem;
-    $diff = $diff - ($tarnum * $rem);
-  }
-  print "\t final: $tarnum bbs size $tarbblen bp rem $diff\n" if ($v);
- 
-  my @BBS;
-  
-  if ($tarnum > 1)
-  {
-    ## Decide what restriction enzyme half sites to use
-    $GD->set_restriction_enzymes(-enzyme_set => "blunts");
-    $GD->remove_from_enzyme_set(-enzymes => $excludes) if ($excludes);
-    my $RES = $GD->enzyme_set;
-    my $objstats = $GD->restriction_status(-sequence => $chseq);
-    my @objcandidates = sort {$RES->{$a}->score <=> $RES->{$b}->score}
-                        grep {$objstats->{$_} == 0}
-                        keys %$objstats;
-    print "\tCH CANDIDATES: @objcandidates\n" if ($v);
-    Bio::GeneDesign::Exception::UnBBable->throw("no RE candidates!")
-      unless scalar(@objcandidates);
-    
-    ## Create a suffix tree for each end of a bb overlap
-    ## The 5' half of an RE site goes in ftree, the 3' half goes in ttree
-    my $ftree = Bio::GeneDesign::PrefixTree->new();
-    my $ttree = Bio::GeneDesign::PrefixTree->new();
-    my %seeks;
-    foreach my $cand (@objcandidates)
-    {
-      my $seq = uc $RES->{$cand}->seq;
-      my $qes = _complement($seq, 1);
-      my $half = length($seq) / 2;
-      $seeks{$cand} = {len => $half};
-      my $a = substr($seq, 0, $half);
-      my $b = substr($seq, $half);
-      my $fnucs = _amb_transcription($b);
-      foreach my $fnuc (@$fnucs)
-      {
-        $ftree->add_prefix($fnuc, $cand);
-        $seeks{$cand}->{$fnuc} = $a;
-      }
-      my $tnucs = _amb_transcription($a);
-      foreach my $tnuc (@$tnucs)
-      {
-        $ttree->add_prefix($tnuc, $cand);
-        $seeks{$cand}->{$tnuc} = $b;
-      }
-      if ($seq ne $qes)
-      {
-        my $c = substr($qes, 0, $half);
-        my $d = substr($qes, $half);
-        $fnucs = _amb_transcription($d);
-        foreach my $fnuc (@$fnucs)
-        {
-          $ftree->add_prefix($fnuc, $cand);
-          $seeks{$cand}->{$fnuc} = $c;
-        }
-        $tnucs = _amb_transcription($c);
-        foreach my $tnuc (@$tnucs)
-        {
-          $ttree->add_prefix($tnuc, $cand);
-          $seeks{$cand}->{$tnuc} = $d;
-        }
-      }
-    }
-
-    
-    ## Search the construct sequence for each half RE with the suffix trees
-    ## Then create every sequences that can overlap BBs
-    my $laps = [];
-    my %anns = ();
-    my $id = 1;
-    my $fref = $GD->search_prefix_tree(-tree => $ftree, -sequence => $chseq);
-    my $tref = $GD->search_prefix_tree(-tree => $ttree, -sequence => $chseq);
-    foreach my $fhit (@$fref)
-    {
-      my $lef = $fhit->[1];
-      next if ($lef < $minbblen - $maxbblap);
-      my $enz_f = $fhit->[0];
-      my $zne_f = $seeks{$enz_f}->{$fhit->[2]};
-      foreach my $thit (@$tref)
-      {
-        my $enz_t = $thit->[0];
-        my $rig = $thit->[1] + $seeks{$enz_t}->{len};
-        my $dist = $rig - $lef;
-        next if ($dist < $minbblap || $dist > $maxbblap);
-        next if ($rig > $chlen - ($minbblen - $maxbblap));
-        my $zne_t = $seeks{$enz_t}->{$thit->[2]};
-        my $ol = substr($chseq, $lef, $dist);
-        my $lapobj = Bio::Seq->new( -seq => $ol, -id => $id);
-        $anns{$id} = {seq => $ol, left => $lef + 1, right => $rig,
-                      enz_f => $enz_f, zne_f => $zne_f,
-                      enz_t => $enz_t, zne_t => $zne_t};
-        push @$laps, $lapobj;
-        $id++;
-      }
-    }
-    print "\t", scalar(@$laps), " overlaps found\n" if ($v);
-    Bio::GeneDesign::Exception::UnBBable->throw("No overlaps passed filtering!")
-      unless scalar(@$laps);
-
-    #Filter overlaps for palindromes and mispriming
-    #If vmatch or blast are not available, filter for uniqueness
-    if ($GD->EMBOSS)
-    {
-      $laps = $GD->filter_palindromes(-sequences => $laps);
-      print "\t", scalar(@$laps), " overlaps palindrome free\n" if ($v);
-    }
-    if ($GD->vmatch)
-    {
-      $laps = $GD->filter_vmatch(-sequences => $laps, -parent => $seqobj);
-      print "\t", scalar(@$laps), " overlaps informative\n" if ($v);
-    }
-    elsif ($GD->BLAST)
-    {
-      $laps = $GD->filter_blast(-sequences => $laps, -parent => $seqobj);
-      print "\t", scalar(@$laps), " overlaps informative\n" if ($v);
-    }
-    else
-    {
-      $laps = $GD->filter_uniqueness(-sequences => $laps);
-      print "\t", scalar(@$laps), " overlaps unique\n" if ($v);
-    }
-    my @overlaps = map {$_->id} @$laps;
-    
-    
-    my $f = floor $tarnum / 4;
-    my $t = (4 - ($tarnum % 4)) % 4;
-    $f = $f - ($t - 1) if ($t > 1);
-    my $arr = 4 x $f . 3 x $t;
-    my $frange = 5;
-    my $trange = 4;
-    
-    
-    #Pick overlaps properly spaced so as to obey length parameters.
-    #Reroll if length parameters are violated by a choice.
-    my $rbound = $maxbblen ? $chlen - $maxbblen : $chlen - $tarbblen;
-    my ($lpos, $rpos) = (1, 1);
-    my $lenz = undef;
-    my %fails;
-    my $fail = 0;
-    my $redoflag = 0;
-    my $counter = 1;
-    my $amideadyet = 1;
-    my @chosenlaps;
-    while ($lpos < $rbound && $amideadyet < 1000)
-    {
-      $amideadyet++;
-      my $rtarget = ($lpos + $tarbblen);
-      my @laps = grep {! exists $fails{$anns{$_}->{seq}}} @overlaps;
-      if ($maxbblen)
-      {
-        @laps = grep {$anns{$_}->{right} <= $lpos + $maxbblen} @laps;
-      }
-      if ($minbblen)
-      {
-        @laps = grep {$anns{$_}->{right} >= $lpos + $minbblen} @laps;
-      }
-      if ($lenz)
-      {
-        @laps = grep {$RES->{$anns{$_}->{enz_t}}->temp == $RES->{$lenz}->temp} @laps;
-        @laps = grep {$RES->{$anns{$_}->{enz_t}}->common_buffers($RES->{$lenz}, 1)} @laps;
-      }
-      if (defined $fpexcludes && scalar @chosenlaps < $frange)
-      {
-        @laps = grep {! exists $fpexcludes->{$anns{$_}->{enz_t}} } @laps;
-      }
-      if (defined $tpexcludes && scalar @chosenlaps > $trange)
-      {
-        @laps = grep {! exists $tpexcludes->{$anns{$_}->{enz_f}} } @laps;
-      }
-      @laps = sort { $RES->{$anns{$a}->{enz_f}}->score + $RES->{$anns{$a}->{enz_t}}->score
-                 <=> $RES->{$anns{$b}->{enz_f}}->score + $RES->{$anns{$b}->{enz_t}}->score
-                 || abs($anns{$a}->{right} - $rtarget) <=> abs($anns{$b}->{right} - $rtarget)
-                 || length($anns{$a}->{left}) <=> length($anns{$b}->{left})}
-              @laps;
-      unless (scalar(@laps))
-      {
-        #discarding last choice
-        my $disgrace = pop @chosenlaps;
-        $fails{$anns{$disgrace}->{seq}}++;
-        if (scalar @chosenlaps)
-        {
-          $rpos = $anns{$chosenlaps[-1]}->{right};
-          $lpos = $anns{$chosenlaps[-1]}->{left};
-          $lenz = $anns{$chosenlaps[-1]}->{enz_f};
-        }
-        else
-        {
-          ($rpos, $lpos) = (1, 1);
-          $lenz = undef;
-          @chosenlaps = ();
-          %fails = ();
-          print "\t\tAdjusting from $tarbblen " if ($v);
-          $tarbblen = $counter % 2 == 0
-                      ? $tarbblen + $counter
-                      : $tarbblen - $counter;
-          print "to $tarbblen " if ($v);
-          if (($maxbblen && $tarbblen > $maxbblen)
-           || ($minbblen && $tarbblen < $minbblen))
-          {
-            print "\n\t\tGDWARNING: giving up... OSCILLATED TOO FAR\n" if ($v);
-            $fail++;
-            Bio::GeneDesign::Exception::UnBBable->throw("no solution apparent");
-          }
-          $counter++;
-        }
-      }
-      else
-      {
-        my $lap = $laps[0];
-        my $laplen = length($anns{$lap}->{seq});
-        $lenz =  $anns{$lap}->{enz_f};
-        print "\tChoosing overlap $laplen bp ", $anns{$lap}->{seq}, if ($v);
-        print " at ", $anns{$lap}->{left}, "..", $anns{$lap}->{right} if ($v);
-        print " (", $anns{$lap}->{enz_f}, "..", $anns{$lap}->{enz_t}, ")" if ($v);
-        print " (", $anns{$lap}->{zne_f}, "..", $anns{$lap}->{zne_t}, ")\n" if ($v);
-        push @chosenlaps, $lap;
-        $rpos = $anns{$chosenlaps[-1]}->{right};
-        $lpos = $anns{$chosenlaps[-1]}->{left};
-        $redoflag = 0;
-        if ($lpos >= $rbound)
-        {
-          if (($minbblen && ($chlen - $lpos) + 1 < $minbblen)
-           || ($maxbblen && ($chlen - $lpos) + 1 > $maxbblen))
-          {
-            print "\t\t\tdiscarding last choice\n" if ($v);
-            my $disgrace = pop @chosenlaps;
-            $fails{$anns{$disgrace}->{seq}}++;
-            if (scalar @chosenlaps)
-            {
-              $rpos = $anns{$chosenlaps[-1]}->{right};
-              $lpos = $anns{$chosenlaps[-1]}->{left};
-              $lenz = $anns{$chosenlaps[-1]}->{enz_f};
-            }
-          }
-        }
-      }
-      $lpos = $chlen if ($fail);
-      if ($amideadyet >= 999)
-      {
-        $fail++;
-        print "\n\t\tGDWARNING: Repeated search over 1000 times!\n" if ($v);
-        next;
-      }
-    }
-    Bio::GeneDesign::Exception::UnBBable->throw("stuck in a rut!") if ($fail);
-    
-    
-    #Make the building blocks
-    $lpos = 1;
-    my $bbnum = 1;
-    my $lastenz = undef;
-    my $lastmatch = undef;
-    while (scalar @chosenlaps)
-    {
-      my $lap = shift @chosenlaps;
-      my $bbseq = substr($chseq, $lpos - 1, $anns{$lap}->{right} - $lpos + 1);
-      my $count = $GD->count($bbseq);
-      my $bbname = $chname . "." . $GD->pad($bbnum, 2);
-      my $atts = {name => $bbname, gcp => $count->{GCp}};
-      $atts->{enz_f} = $lastenz if ($lastenz);
-      $atts->{zne_f} = $lastmatch if ($lastmatch);
-      $atts->{enz_t} = $anns{$lap}->{enz_t};
-      $atts->{zne_t} = $anns{$lap}->{zne_t};
-      $lastenz = $anns{$lap}->{enz_f};
-      $lastmatch = $anns{$lap}->{zne_f};
-      push @BBS, Bio::SeqFeature::Generic->new(
-        -primary    => "building_block",
-        -start      => $lpos,
-        -end        => $anns{$lap}->{right},
-        -tag        => $atts
-      );
-      $bbnum++;
-      print "\t\t$bbname ", length($bbseq), " bp\n" if ($v);
-      $lpos = $anns{$lap}->{left};
-    }
-    my $bbseq = substr($chseq, $lpos - 1);
-    my $count = $GD->count($bbseq);
-    my $bbname = $chname . "." . $GD->pad($bbnum, 2);
-    my $atts = {name => $bbname, gcp => $count->{GCp},
-                enz_f => $lastenz, zne_f => $lastmatch};
-    push @BBS, Bio::SeqFeature::Generic->new(
-      -primary    => "building_block",
-      -start      => $lpos,
-      -end        => $chlen,
-      -tag        => $atts
-    );
-    $bbnum++;
-    print "\t\t$bbname ", length($bbseq), " bp\n" if ($v);
-  }
-  else
-  {
-    my $bbseq = $chseq;
-    my $count = $GD->count($bbseq);
-    my $bbname = $chname . ".01";
-    my $atts = {name => $bbname, gcp => $count->{GCp}};
-    push @BBS, Bio::SeqFeature::Generic->new(
-      -primary    => "building_block",
-      -start      => 1,
-      -end        => $chlen,
-      -tag        => $atts
-    );
-  }
-  
-  print "\n\n" if ($v);
-  return \@BBS;
-}
-
-1;
-
-__END__
-
-=head1 COPYRIGHT AND LICENSE
-
-Copyright (c) 2012, GeneDesign developers
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice, this
-list of conditions and the following disclaimer in the documentation and/or
-other materials provided with the distribution.
-
-* The names of Johns Hopkins, the Joint Genome Institute, the Lawrence Berkeley
-National Laboratory, the Department of Energy, and the GeneDesign developers may
-not be used to endorse or promote products derived from this software without
-specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=cut

lib/Bio/GeneDesign/BuildingBlocks/overlap.pm

-=head1 NAME
-
-Bio::GeneDesign::BuildingBlocks::overlap
-
-=head1 VERSION
-
-Version 5.00
-
-=head1 DESCRIPTION
-
-=head1 AUTHOR
-
-Sarah Richardson <SMRichardson@lbl.gov>.
-
-=cut
-
-package Bio::GeneDesign::BuildingBlocks::overlap;
-require Exporter;
-
-use Bio::GeneDesign::Exceptions;
-use Bio::SeqFeature::Generic;
-use POSIX;
-
-use strict;
-use warnings;
-
-our $VERSION = 5.00;
-
-use base qw(Exporter);
-our @EXPORT_OK = qw(
-  _carve_building_blocks
-);
-our %EXPORT_TAGS =  ( GD => \@EXPORT_OK );
-  
-=head2 _carve_building_blocks()
-
-=cut
-
-sub _carve_building_blocks
-{
-  my ($GD, $seqobj, $tarbblen, $maxbblen, $bblenmin, $tarbblap, $stitch, $excludes, $fpexcludes, $tpexcludes, $v)
-    = @_;
-
-  my $chseq = $seqobj->seq;
-  my $chlen = length($chseq);
-  $tarbblen  = $chlen if ($chlen < $tarbblen);
-  my @BBS;
-  
-  my $chname = $seqobj->id;
-  my $len = length($seqobj->seq);
-  
-  
-  ## Decide what size bbs to go for and how many
-  ## Adjust the target building block size so as to avoid outliers.
-  my $tarnum = $chlen > $maxbblen 
-              ? ceil($chlen / ($tarbblen - $tarbblap))
-              : 1;
-  my $diff = $chlen - (($tarnum * $tarbblen) - ($tarbblap * ($tarnum - 1)));
-  print "\ttarget: $tarnum bbs size $tarbblen bp rem $diff\n" if ($v);
-  if (abs($diff) >= $tarnum)
-  {
-    my $rem = sprintf "%.0f", $diff / $tarnum;
-    $tarbblen = $tarbblen + $rem;
-    $diff = $diff - ($tarnum * $rem);
-  }
-  print "\t final: $tarnum bbs size $tarbblen bp rem $diff\n" if ($v);
-
-
-  my $laps = [];
-  if ($tarnum > 1)
-  {
-    ## Search the construct sequence for sequences that can overlap BBs
-    my $lef = $tarbblen - (3 * $tarbblap);
-    my $rig = $lef + $tarbblap - 1;
-    my $id = 1;
-    while ($rig <= ($chlen - ($tarbblap + 1)))
-    {
-      my $ol = substr($chseq, $lef, $rig - $lef + 1);
-      my $lapobj = Bio::Seq->new( -seq => $ol, -id => $id);
-      my $lefta   = Bio::Annotation::SimpleValue->new(-value => $lef + 1);
-      $lapobj->add_Annotation('left', $lefta);
-      my $righta  = Bio::Annotation::SimpleValue->new(-value => $rig + 1);
-      $lapobj->add_Annotation('right', $righta);
-      push @$laps, $lapobj;
-      
-      $id++;
-      $lef++;
-      $rig = $lef + $tarbblap - 1;
-    }
-    print "\t", scalar(@$laps), " overlaps found\n" if ($v);
-    
-    #Filter overlaps for palindromes and mispriming
-    #If vmatch or blast are not available, filter for uniqueness
-    if ($GD->EMBOSS)
-    {
-      $laps = $GD->filter_palindromes(-sequences => $laps);
-      print "\t", scalar(@$laps), " overlaps palindrome free\n" if ($v);
-    }
-    if ($GD->vmatch)
-    {
-      $laps = $GD->filter_vmatch(-sequences => $laps, -parent => $seqobj);
-      print "\t", scalar(@$laps), " overlaps informative\n" if ($v);
-    }
-    elsif ($GD->BLAST)
-    {
-      $laps = $GD->filter_blast(-sequences => $laps, -parent => $seqobj);
-      print "\t", scalar(@$laps), " overlaps informative\n" if ($v);
-    }
-    else
-    {
-      $laps = $GD->filter_uniqueness(-sequences => $laps);
-      print "\t", scalar(@$laps), " overlaps unique\n" if ($v);
-    }
-    
-    
-    #Flatten overlap annotations for speed, figure out the average length of the
-    #overlaps that passed filtering
-    my %LAPS;
-    foreach my $lap (@$laps)
-    {
-      my $lefann  = join(q{}, map {$_->value} $lap->get_Annotations('left'));
-      my $rigann = join(q{}, map {$_->value} $lap->get_Annotations('right'));
-    
-      $LAPS{$lap->seq} = [$lap->seq, $lefann, $rigann];
-    }
-    my @overlaps = values %LAPS;
-    
-    
-    
-    #Pick overlaps properly spaced so as to obey length parameters.
-    #Reroll if length parameters are violated by a choice.
-    my $rbound = $maxbblen ? $chlen - $maxbblen : $chlen - $tarbblen;
-    my ($lpos, $rpos) = (1, 1);
-    my %fails;
-    my $fail = 0;
-    my $redoflag = 0;
-    my $counter = 1;
-    my $amideadyet = 1;
-    my @chosenlaps;
-    while ($lpos < $rbound && $amideadyet < 1000)
-    {
-      $amideadyet++;
-      my $rtarget = ($lpos + $tarbblen);
-      my @laps = grep {! exists $fails{$_->[0]}} @overlaps;
-      if ($maxbblen)
-      {
-        @laps = grep {$_->[2] <= $lpos + $maxbblen} @laps;
-      }
-      if ($bblenmin)
-      {
-        @laps = grep {$_->[2] >= $lpos + $bblenmin} @laps;
-      }
-      @laps = sort {abs($a->[2] - $rtarget) <=> abs($b->[2] - $rtarget)
-                || length($a->[0]) <=> length($b->[0])}
-              @laps;
-      unless (scalar(@laps))
-      {
-        #discarding last choice
-        my $disgrace = pop @chosenlaps;
-        $fails{$disgrace->[0]}++;
-        if (scalar @chosenlaps)
-        {
-          $rpos = $chosenlaps[-1]->[2];
-          $lpos = $chosenlaps[-1]->[1];
-        }
-        else
-        {
-          ($rpos, $lpos) = (1, 1);
-          @chosenlaps = ();
-          %fails = ();
-          print "\t\tAdjusting from $tarbblen " if ($v);
-          $tarbblen = $counter % 2 == 0
-                      ? $tarbblen + $counter
-                      : $tarbblen - $counter;
-          print "to $tarbblen " if ($v);
-          if (($maxbblen && $tarbblen > $maxbblen)
-           || ($bblenmin && $tarbblen < $bblenmin))
-          {
-            print "\n\t\tGDWARNING: giving up... OSCILLATED TOO FAR\n" if ($v);
-            $fail++;
-            Bio::GeneDesign::Exception::UnBBable->throw("no solution apparent");
-          }
-          $counter++;
-        }
-      }
-      else
-      {
-        my $lap = $laps[0];
-        print "\tChoosing overlap $lap->[0] at $lap->[1], $lap->[2]\n" if ($v);
-        push @chosenlaps, $lap;
-        $rpos = $chosenlaps[-1]->[2];
-        $lpos = $chosenlaps[-1]->[1];
-        $redoflag = 0;
-        if ($lpos >= $rbound)
-        {
-          if (($bblenmin && ($chlen - $lpos) + 1 < $bblenmin)
-           || ($maxbblen && ($chlen - $lpos) + 1 > $maxbblen))
-          {
-            print "\t\t\tdiscarding last choice\n" if ($v);
-            my $disgrace = pop @chosenlaps;
-            $fails{$disgrace->[0]}++;
-            if (scalar @chosenlaps)
-            {
-              $rpos = $chosenlaps[-1]->[2];
-              $lpos = $chosenlaps[-1]->[1];
-            }
-          }
-        }
-      }
-      $lpos = $chlen if ($fail);
-      if ($amideadyet >= 999)
-      {
-        $fail++;
-        print "GDWARNING: Repeated search over 1000 times!\n" if ($v);
-        next;
-      }
-    }
-    Bio::GeneDesign::Exception::UnBBable->throw("stuck in a rut!") if ($fail);
-    
-    
-    #Make the building blocks
-    $lpos = 1;
-    my $bbnum = 1;
-    while (scalar @chosenlaps)
-    {
-      my $lap = shift @chosenlaps;
-      my $bbseq = substr($chseq, $lpos - 1, $lap->[2] - $lpos + 1);
-      my $count = $GD->count($bbseq);
-      my $bbname = $chname . "." . $GD->pad($bbnum, 2);
-      push @BBS, Bio::SeqFeature::Generic->new(
-        -primary    => "building_block",
-        -start      => $lpos,
-        -end        => $lap->[2],
-        -tag        => {
-          #sequence   => $bbseq,
-          name       => $bbname,
-          gcp        => $count->{'GCp'}
-        }
-      );
-      $bbnum++;
-      print "\t\t$bbname ", length($bbseq), " bp\n" if ($v);
-      $lpos = $lap->[1];
-    }
-    my $bbseq = substr($chseq, $lpos - 1);
-    my $count = $GD->count($bbseq);
-    my $bbname = $chname . "." . $GD->pad($bbnum, 2);
-    push @BBS, Bio::SeqFeature::Generic->new(
-      -primary    => "building_block",
-      -start      => $lpos,
-      -end        => $chlen,
-      -tag        => {
-        #sequence   => $bbseq,
-        name       => $bbname,
-        gcp        => $count->{'GCp'}
-      }
-    );
-    $bbnum++;
-    print "\t\t$bbname ", length($bbseq), " bp\n" if ($v);
-  }
-  else
-  {
-    my $bbseq = $chseq;
-    my $count = $GD->count($bbseq);
-    my $bbname = $chname . ".01";
-    push @BBS, Bio::SeqFeature::Generic->new(
-      -primary    => "building_block",
-      -start      => 1,
-      -end        => $chlen,
-      -tag        => {
-        #sequence   => $bbseq,
-        name       => $bbname,
-        gcp        => $count->{'GCp'}
-      }
-    );
-  }
-  
-  print "\n\n" if ($v);
-  return \@BBS;
-}
-
-1;
-
-__END__
-
-=head1 COPYRIGHT AND LICENSE
-
-Copyright (c) 2012, GeneDesign developers
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice, this
-list of conditions and the following disclaimer in the documentation and/or
-other materials provided with the distribution.
-
-* The names of Johns Hopkins, the Joint Genome Institute, the Lawrence Berkeley
-National Laboratory, the Department of Energy, and the GeneDesign developers may
-not be used to endorse or promote products derived from this software without
-specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=cut

lib/Bio/GeneDesign/Oligos/JGI.pm

-=head1 NAME
-
-Bio::GeneDesign::Oligos::JGI
-
-=head1 VERSION
-
-Version 5.00
-
-=head1 DESCRIPTION
-
-=head1 AUTHOR
-
-Sarah Richardson <SMRichardson@lbl.gov>.
-
-=cut
-
-package Bio::GeneDesign::Oligos::JGI;
-require Exporter;
-
-use Bio::GeneDesign::Basic qw(_complement _melt);
-use Bio::GeneDesign::Exceptions;
-use Bio::Seq;
-use Bio::SeqFeature::Generic;
-
-use strict;
-use warnings;
-
-our $VERSION = 5.00;
-
-use base qw(Exporter);
-our @EXPORT_OK = qw(
-  _chop_oligos
-);
-our %EXPORT_TAGS =  ( GD => \@EXPORT_OK);
-  
-=head2 _carve_building_blocks()
-
-  
-=cut
-
-sub _chop_oligos
-{
-  my ($GD, $bb, $olilenmax, $olilenmin, $olilen, $laptemp, $laplenmin,
-      $tmtol, $poolsize, $poolnummax, $v) = @_;
-
-  my @OLIGOS;
-  my $fail = 0;
-  my $univF = 0;
-  my $univR = 0;
-    
-  #Add prefixes and/or suffixes, if necessary; note whether universals are
-  #indicated. create a seqobj for BB seq for filtering overlaps later
-  my $bbname = join(q{}, $bb->get_tag_values("name"));
-  my $bbseq = $bb->seq->seq;
-  if ($bb->has_tag("BBprefix"))
-  {
-    $bbseq = join(q{}, $bb->get_tag_values("BBprefix")) . $bbseq;
-    $univF = 1;
-  }
-  if ($bb->has_tag("BBsuffix"))
-  {
-    $bbseq = $bbseq . join(q{}, $bb->get_tag_values("BBsuffix"));
-    $univR = 1;
-  }
-  $bbseq        =~ s/\s//xg;
-  $bbseq        = uc $bbseq;
-  my $bblen     = length($bbseq);
-  my $bbstart   = $bb->start;
-  my $bbend     = $bb->end;
-  my $bbseqobj  = Bio::Seq->new(-id => $bbname, -seq => $bbseq);
-  print "Chopping $bbname ($bblen bp)\n" if ($v);
-
-    
-  #Find all of the overlaps in the building block that fit the Tm profile
-  #Store the Tm and position with the overlap sequence as a Bio::Seq object
-  my $laps = [];
-  my $lef = 0;
-  my $id = 0;
-  my $rig = $laplenmin || 7;
-  while ($rig <= $bblen)
-  {
-    my $ol = substr($bbseq, $lef, $rig - $lef + 1);
-    my $Tm = $GD->melt(-sequence => $ol);
-    
-    if ($Tm < $laptemp - $tmtol)
-    {
-      $rig++;
-      next;
-    }
-    elsif ($Tm > $laptemp + $tmtol)
-    {
-      $lef++;
-      $rig = $laplenmin  ? $lef + $laplenmin  : $lef + 7;
-      next;
-    }
-
-    my $lapobj = Bio::Seq->new( -seq => $ol, -id => $id);
-    my $TmA     = Bio::Annotation::SimpleValue->new(-value => $Tm);
-    $lapobj->add_Annotation('Tm', $TmA);
-    my $lefta   = Bio::Annotation::SimpleValue->new(-value => $lef + 1);
-    $lapobj->add_Annotation('left', $lefta);
-    my $righta  = Bio::Annotation::SimpleValue->new(-value => $rig + 1);
-    $lapobj->add_Annotation('right', $righta);
-    push @$laps, $lapobj;
-
-    $lef++;
-    $id++;
-    $rig = $laplenmin  ? $lef + $laplenmin  : $lef + 7;
-  }
-  print "\t", scalar(@$laps), " overlaps found\n" if ($v);
-
-
-  #Filter overlaps for palindromes and mispriming
-  #If vmatch or blast are not available, filter for uniqueness
-  if ($GD->EMBOSS)
-  {
-    $laps = $GD->filter_palindromes(-sequences => $laps);
-    print "\t", scalar(@$laps), " overlaps palindrome free\n" if ($v);
-  }
-  if ($GD->vmatch)
-  {
-    $laps = $GD->filter_vmatch(-sequences => $laps, -parent => $bbseqobj);
-    print "\t", scalar(@$laps), " overlaps informative\n" if ($v);
-  }
-  elsif ($GD->BLAST)
-  {
-    $laps = $GD->filter_blast(-sequences => $laps, -parent => $bbseqobj);
-    print "\t", scalar(@$laps), " overlaps informative\n" if ($v);
-  }
-  else
-  {
-    $laps = $GD->filter_uniqueness(-sequences => $laps);
-    print "\t", scalar(@$laps), " overlaps unique\n" if ($v);
-  }
-
-
-  #Flatten overlap annotations for speed, figure out the average length of the
-  #overlaps that passed filtering
-  my %LAPS;
-  foreach my $lap (@$laps)
-  {
-    my $Tm    = join(q{}, map {$_->value} $lap->get_Annotations('Tm'));
-    my $lefann  = join(q{}, map {$_->value} $lap->get_Annotations('left'));
-    my $rigann = join(q{}, map {$_->value} $lap->get_Annotations('right'));
-    
-    $LAPS{$lap->seq} = [$Tm, $lap->seq, $lefann, $rigann];
-  }
-  my @overlaps = values %LAPS;
-  my $avg = 0;
-  $avg += length($_->[1]) foreach @overlaps;
-  $avg = sprintf "%.0f", $avg / scalar(@overlaps);
-
-
-  #Decide what size oligos to go for and how many.
-  #Adjust the target size to avoid length outliers.
-  my $tarlen = $olilen || sprintf "%.0f", ($olilenmin + $olilenmax) / 2;
-  my $tarnum = sprintf "%.0f", $bblen / ($tarlen - $avg);
-  my $tarolilen = sprintf "%.0f", $bblen / $tarnum;
-  my $diff = $bblen - (($tarnum * $tarolilen) - ($avg * ($tarnum - 1)));
-  print "\ttarget: $tarnum oligos size $tarolilen bp rem $diff\n" if ($v);
-  if (abs($diff) >= $tarnum)
-  {
-    my $rem = sprintf "%.0f", $diff / $tarnum;
-    $tarolilen = $tarolilen + $rem;
-    $diff = $diff - ($tarnum * $rem);
-  }
-  print "\t final: $tarnum oligos size $tarolilen bp rem $diff\n" if ($v);
-
-    
-  #Pick overlaps properly spaced so as to obey length parameters.
-  #Reroll if length parameters are violated by a choice.
-  my $rbound = $olilenmax ? $bblen - $olilenmax : $bblen - $tarolilen;
-  my ($lpos, $rpos) = (1, 1);
-  my %fails;
-  my $redoflag = 0;
-  my $counter = 1;
-  my $amideadyet = 1;
-  my @chosenlaps;
-  while ($lpos < $rbound && $amideadyet < 1000)
-  {
-    $amideadyet++;
-    my $rtarget = ($lpos + $tarolilen);
-    my @laps = grep {! exists $fails{$_->[1]}} @overlaps;
-    if ($olilenmax)
-    {
-      @laps = grep {$_->[3] <= $lpos + $olilenmax} @laps;
-    }
-    if ($olilenmin)
-    {
-      @laps = grep {$_->[3] >= $lpos + $olilenmin} @laps;
-    }
-    @laps = sort {abs($a->[3] - $rtarget) <=> abs($b->[3] - $rtarget)
-              || length($a->[1]) <=> length($b->[1])}
-            @laps;
-    unless (scalar(@laps))
-    {
-      #discarding last choice
-      my $disgrace = pop @chosenlaps;
-      $fails{$disgrace->[1]}++;
-      if (scalar @chosenlaps)
-      {
-        $rpos = $chosenlaps[-1]->[3];
-        $lpos = $chosenlaps[-1]->[2];
-      }
-      else
-      {
-        ($rpos, $lpos) = (1, 1);
-        @chosenlaps = ();
-        %fails = ();
-        print "\t\tAdjusting from $tarolilen " if ($v);
-        $tarolilen = $counter % 2 == 0
-                    ? $tarolilen + $counter
-                    : $tarolilen - $counter;
-        print "to $tarolilen " if ($v);
-        if (($olilenmax && $tarolilen > $olilenmax)
-         || ($olilenmin && $tarolilen < $olilenmin))
-        {
-          print "GDWARNING: giving up... OSCILLATED TOO FAR\n" if ($v);
-          $fail++;
-          Bio::GeneDesign::Exception::UnOLable->throw("no solution apparent");
-        }
-        $counter++;
-      }
-    }
-    else
-    {
-      my $lap = $laps[0];
-      print "\tChoosing overlap $lap->[1] at $lap->[2], $lap->[3]\n" if ($v);
-      push @chosenlaps, $lap;
-      $rpos = $chosenlaps[-1]->[3];
-      $lpos = $chosenlaps[-1]->[2];
-      $redoflag = 0;
-      if ($lpos >= $rbound)
-      {
-        if (($olilenmin && ($bblen - $lpos) + 1 < $olilenmin)
-         || ($olilenmax && ($bblen - $lpos) + 1 > $olilenmax))
-        {
-          print "\t\t\tdiscarding last choice\n" if ($v);
-          my $disgrace = pop @chosenlaps;
-          $fails{$disgrace->[1]}++;
-          if (scalar @chosenlaps)
-          {
-            $rpos = $chosenlaps[-1]->[3];
-            $lpos = $chosenlaps[-1]->[2];
-          }
-        }
-      }
-    }
-    $lpos = $bblen if ($fail);
-    if ($amideadyet >= 999)
-    {
-      $fail++;
-      print "GDWARNING: Repeated search over 1000 times!\n" if ($v);
-      next;
-    }
-  }
-  Bio::GeneDesign::Exception::UnOLable->throw("stuck in a rut!") if ($fail);
-
-
-  #Make the oligos
-  my $y = 1;
-  $lpos = 1;
-  my $divisor = $poolnummax || 2;
-  my $count = scalar(@chosenlaps);
-  my $maxol = $poolnummax  ? $poolnummax * ($poolsize - 1) : undef;
-  if ($maxol && ( $count + 1 ) > $maxol)
-  {
-    print "GDWARNING: $bbname has EXCEEDS MAX POOL NUMBER.\n";
-    $divisor++;
-  }
-  my $flag = $poolsize  ? 1 : 0;
-  my $olcount = $count + 1;
-  $olcount++ if ($olcount % 2 != 0);
-  my $div;
-  if ($flag)
-  {
-    $div = ($count+1) <= $poolsize - 1
-          ?  $poolsize
-          : int($olcount / $divisor);
-  }
-  my $pcanum = 1;
-  my $olnum = 1;
-  
-  ##Make the small forward unless the bb had a prefix
-  unless ($univF)
-  {
-    my $frig = $avg / 2;
-    $frig++ while (_melt(substr($bbseq, 0, $frig)) < $laptemp - $tmtol);
-    my $olseq = substr($bbseq, 0, $frig);
-    my $olname = $bbname . ".$pcanum." . $GD->pad($olnum, 2);
-    push @OLIGOS, Bio::SeqFeature::Generic->new(
-      -primary    => "assembly_oligo",
-      -start      => 1,
-      -end        => $frig + 1,
-      -tag        => {
-        sequence   => $olseq,
-        name       => $olname
-      }
-    );
-    $olnum++;
-    print "\t\t$olname ", length($olseq), " bp\n" if ($v);
-  }
-  else
-  {
-    $olnum++;
-    print "\t\tUNIVF\n" if ($v);
-  }
-  my ($olname, $olseq) = ("", "");
-  while (scalar @chosenlaps)
-  {
-    my $lap = shift @chosenlaps;
-    $olseq = substr($bbseq, $lpos-1, $lap->[3] - $lpos + 1);
-    $olname = $bbname . ".$pcanum." . $GD->pad($olnum, 2);
-    push @OLIGOS, Bio::SeqFeature::Generic->new(
-      -primary    => "assembly_oligo",
-      -start      => $lpos,
-      -end        => $lap->[3],
-      -tag        => {
-        sequence   => $olseq,
-        name       => $olname
-      }
-    );
-    $olnum++;
-    print "\t\t$olname ", length($olseq), " bp\n" if ($v);
-    
-    #Make the pool spanners if necessary
-    #Increment the poolnumber
-    if ($flag && $y % $div == 0)
-    {
-      $olname = $bbname . ".$pcanum." . $GD->pad($olnum, 2);
-      push @OLIGOS, Bio::SeqFeature::Generic->new(
-        -primary    => "assembly_oligo",
-        -start      => $lap->[2],
-        -end        => $lap->[3],
-        -strand     => -1,
-        -tag        => {
-          sequence   => _complement($lap->[1], 1),
-          name       => $olname
-        }
-      );
-      print "\t\t$olname ", length($lap->[1]), " bp\n" if ($v);
-      $pcanum++;
-      $olnum = 1;
-      $olname = $bbname . ".$pcanum." . $GD->pad($olnum, 2);
-      push @OLIGOS, Bio::SeqFeature::Generic->new(
-        -primary    => "assembly_oligo",
-        -start      => $lap->[2],
-        -end        => $lap->[3],
-        -tag        => {
-          sequence   => $lap->[1],
-          name       => $olname
-        }
-      );
-      print "\t\t$olname ", length($lap->[1]), " bp\n" if ($v);
-      $olnum++;
-    }
-    
-    $lpos = $lap->[2];
-    $y++;
-  }
-  
-  #Make the lagging oligo
-  $olseq = substr($bbseq, $lpos-1);
-  $olname = $bbname . ".$pcanum." . $GD->pad($olnum, 2);
-  push @OLIGOS, Bio::SeqFeature::Generic->new(
-    -primary    => "assembly_oligo",
-    -start      => $lpos,
-    -end        => $bblen,
-    -tag        => {
-      sequence   => $olseq,
-      name       => $olname
-    }
-  );
-  $olnum++;
-  $bb->add_tag_value("pca_pool_count", $pcanum);
-  print "\t\t$olname ", length($olseq), " bp\n" if ($v);
-
-  ##Make the small reverse unless the bb had a suffix
-  unless ($univR)
-  {
-    my $flef = $avg / 2;
-    $flef++ while (_melt(substr($bbseq, -$flef)) < $laptemp - $tmtol);
-    $olseq = substr($bbseq, -$flef);
-    $olseq = $GD->complement($olseq, 1);
-    $olname = $bbname . ".$pcanum." . $GD->pad($olnum, 2);
-    push @OLIGOS, Bio::SeqFeature::Generic->new(
-      -primary    => "assembly_oligo",
-      -start      => $bblen - $flef,
-      -strand     => -1,
-      -end        => $bblen,
-      -tag        => {
-        sequence   => $olseq,
-        name       => $olname
-      }
-    );
-    $olnum++;
-    print "\t\t$olname ", length($olseq), " bp\n" if ($v);
-  }
-  else
-  {
-    $olnum++;
-    print "\t\tUNIVR\n" if ($v);
-  }
-  
-  
-  #Long attributes that come in from a genbank file will have corruption
-  #remove spaces and reattribute to fix bbs in genbank file ):
-  foreach my $tag ($bb->get_all_tags())
-  {
-    my $value = join(q{}, $bb->get_tag_values($tag));
-    $value =~ s/\s//xg;
-    $bb->remove_tag($tag);
-    $bb->add_tag_value($tag, $value);
-  }
-    
-    
-  #Adjust the start and stop coordinates of the oligos to reflect the building
-  #blocks placement in its parent sequence and the non-parental prefix/suffixes
-  foreach my $ol (@OLIGOS)
-  {
-    my ($start, $end) = ($ol->start, $ol->end);
-    if ($bbstart != 1)
-    {
-      $start  += $bbstart;
-      $end    += $bbstart;
-    }
-    if ($bb->has_tag("BBprefix"))
-    {
-      my $offset = length(join(q{}, $bb->get_tag_values("BBprefix")));
-      $start  -= $offset;
-      $end    -= $offset;
-    }
-    $ol->start($start);
-    $ol->end($end);
-  }
-  
-  print "\n\n" if ($v);
-  return \@OLIGOS;
-}
-
-1;
-
-__END__
-
-=head1 COPYRIGHT AND LICENSE
-
-Copyright (c) 2012, GeneDesign developers
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice, this
-list of conditions and the following disclaimer in the documentation and/or
-other materials provided with the distribution.
-
-* The names of Johns Hopkins, the Joint Genome Institute, the Lawrence Berkeley
-National Laboratory, the Department of Energy, and the GeneDesign developers may
-not be used to endorse or promote products derived from this software without
-specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=cut

lib/Bio/GeneDesign/Oligos/JHU.pm

-
-=head1 NAME
-
-Bio::GeneDesign::Oligos::JHU
-
-=head1 VERSION
-
-Version 5.00
-
-=head1 DESCRIPTION
-
-=head1 AUTHOR
-
-Sarah Richardson <SMRichardson@lbl.gov>.
-
-=cut
-
-package Bio::GeneDesign::Oligos::JHU;
-require Exporter;
-
-use Bio::GeneDesign::Basic qw(_complement _melt);
-use Bio::GeneDesign::Exceptions;
-use Bio::Seq;
-use Bio::SeqFeature::Generic;
-
-use strict;
-use warnings;
-
-our $VERSION = 5.00;
-
-use base qw(Exporter);
-our @EXPORT_OK = qw(
-  _chop_oligos
-);
-our %EXPORT_TAGS =  ( GD => \@EXPORT_OK);
-  
-=head2 _carve_building_blocks()
-
-  
-=cut
-
-sub _chop_oligos
-{
-  my ($GD, $bb, $olilenmax, $olilenmin, $olilen, $laptemp, $laplenmin,
-      $tmtol, $poolsize, $poolnummax, $v) = @_;
-
-  my @OLIGOS;
-  my $fail = 0;
-  my $univF = 0;
-  my $univR = 0;
-    
-  #Add prefixes and/or suffixes, if necessary; note whether universals are
-  #indicated. create a seqobj for BB seq for filtering overlaps later
-  my $bbname = join(q{}, $bb->get_tag_values("name"));
-  my $bbseq = $bb->seq->seq;
-  if ($bb->has_tag("BBprefix"))
-  {
-    $bbseq = join(q{}, $bb->get_tag_values("BBprefix")) . $bbseq;
-    $univF = 1;
-  }
-  if ($bb->has_tag("BBsuffix"))
-  {
-    $bbseq = $bbseq . join(q{}, $bb->get_tag_values("BBsuffix"));
-    $univR = 1;
-  }
-  $bbseq        =~ s/\s//xg;
-  $bbseq        = uc $bbseq;
-  my $bblen     = length($bbseq);
-  my $bbstart   = $bb->start;
-  my $bbend     = $bb->end;
-  my $bbseqobj  = Bio::Seq->new(-id => $bbname, -seq => $bbseq);
-  print "Chopping $bbname ($bblen bp)\n" if ($v);
-
-    
-  #Find all of the overlaps in the building block that fit the Tm profile
-  #Store the Tm and position with the overlap sequence as a Bio::Seq object
-  my $laps = [];
-  my $lef = 0;
-  my $id = 0;
-  my $rig = $laplenmin || 7;
-  while ($rig <= $bblen)
-  {
-    my $ol = substr($bbseq, $lef, $rig - $lef + 1);
-    my $lapobj = Bio::Seq->new( -seq => $ol, -id => $id);
-    my $Tm = $GD->melt(-sequence => $lapobj);
-    
-    if ($Tm < $laptemp - $tmtol)
-    {
-      $rig++;
-      next;
-    }
-    elsif ($Tm > $laptemp + $tmtol)
-    {
-      $lef++;
-      $rig = $laplenmin  ? $lef + $laplenmin  : $lef + 7;
-      next;
-    }
-    
-    my $TmA     = Bio::Annotation::SimpleValue->new(-value => $Tm);
-    $lapobj->add_Annotation('Tm', $TmA);
-    my $lefta   = Bio::Annotation::SimpleValue->new(-value => $lef + 1);
-    $lapobj->add_Annotation('left', $lefta);
-    my $righta  = Bio::Annotation::SimpleValue->new(-value => $rig + 1);
-    $lapobj->add_Annotation('right', $righta);
-    push @$laps, $lapobj;
-
-    $lef++;
-    $id++;
-    $rig = $laplenmin  ? $lef + $laplenmin  : $lef + 7;
-  }
-  print "\t", scalar(@$laps), " overlaps found\n" if ($v);
-
-
-  #Filter overlaps for palindromes and mispriming
-  #If vmatch or blast are not available, filter for uniqueness
-  if ($GD->EMBOSS)
-  {
-    $laps = $GD->filter_palindromes(-sequences => $laps);
-    print "\t", scalar(@$laps), " overlaps palindrome free\n" if ($v);
-  }
-  if ($GD->vmatch)
-  {
-    $laps = $GD->filter_vmatch(-sequences => $laps, -parent => $bbseqobj);
-    print "\t", scalar(@$laps), " overlaps informative\n" if ($v);
-  }
-  elsif ($GD->BLAST)
-  {
-    $laps = $GD->filter_blast(-sequences => $laps, -parent => $bbseqobj);
-    print "\t", scalar(@$laps), " overlaps informative\n" if ($v);
-  }
-  else
-  {
-    $laps = $GD->filter_uniqueness(-sequences => $laps);
-    print "\t", scalar(@$laps), " overlaps unique\n" if ($v);
-  }
-
-
-  #Flatten overlap annotations for speed, figure out the average length of the
-  #overlaps that passed filtering
-  my %LAPS;
-  foreach my $lap (@$laps)
-  {
-    my $Tm    = join(q{}, map {$_->value} $lap->get_Annotations('Tm'));
-    my $lefann  = join(q{}, map {$_->value} $lap->get_Annotations('left'));
-    my $rigann = join(q{}, map {$_->value} $lap->get_Annotations('right'));