Mercurial > repos > dereeper > subseq_protein
view extract_proteic_seq_using_coordinates.pl @ 0:60507a6de56c draft
Uploaded
author | dereeper |
---|---|
date | Sun, 16 Sep 2012 09:26:09 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; use Bio::SeqIO; use Getopt::Long; my $usage = qq~Usage:$0 <args> [<opts>] where <args> are: -i, --input_fasta <input protein FASTA file> -o, --output_fasta <output FASTA file> -c, --coordinates <coordinates file> ~; $usage .= "\n"; my ($input_fasta,$output_fasta,$coordinate_file); GetOptions( "input_fasta=s" => \$input_fasta, "output_fasta=s" => \$output_fasta, "coordinates=s" => \$coordinate_file ); die $usage if ( !$input_fasta || !$output_fasta || !$coordinate_file ); my %coordinates; open(my $COORD,$coordinate_file); while(<$COORD>) { my $line = $_; chomp($line); my ($id,$start,$end) = split(/\t/,$line); $coordinates{$id}{"start"} = $start; $coordinates{$id}{"end"} = $end; } close($COORD); my $in = Bio::SeqIO->new(-file => "$input_fasta" , '-format' => 'Fasta'); my $out = Bio::SeqIO->new(-file => ">$output_fasta" , '-format' => 'Fasta'); while ( my $seq = $in->next_seq() ) { my $id = $seq -> id(); my $start = $coordinates{$id}{"start"}; my $end = $coordinates{$id}{"end"}; if ($start && $end) { my $subseq = $seq->subseq($start,$end); my $new_seq = Bio::Seq->new( -seq => $subseq, -id => $id ); $out->write_seq($new_seq); } }