Mercurial > repos > dereeper > subseq_protein
diff extract_proteic_seq_using_coordinates.pl @ 0:60507a6de56c draft
Uploaded
author | dereeper |
---|---|
date | Sun, 16 Sep 2012 09:26:09 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_proteic_seq_using_coordinates.pl Sun Sep 16 09:26:09 2012 -0400 @@ -0,0 +1,59 @@ +#!/usr/bin/perl + +use strict; +use Bio::SeqIO; +use Getopt::Long; + +my $usage = qq~Usage:$0 <args> [<opts>] +where <args> are: + -i, --input_fasta <input protein FASTA file> + -o, --output_fasta <output FASTA file> + -c, --coordinates <coordinates file> +~; +$usage .= "\n"; + +my ($input_fasta,$output_fasta,$coordinate_file); + +GetOptions( + "input_fasta=s" => \$input_fasta, + "output_fasta=s" => \$output_fasta, + "coordinates=s" => \$coordinate_file +); + + +die $usage + if ( !$input_fasta || !$output_fasta || !$coordinate_file ); + + +my %coordinates; +open(my $COORD,$coordinate_file); +while(<$COORD>) +{ + my $line = $_; + chomp($line); + my ($id,$start,$end) = split(/\t/,$line); + $coordinates{$id}{"start"} = $start; + $coordinates{$id}{"end"} = $end; +} +close($COORD); + +my $in = Bio::SeqIO->new(-file => "$input_fasta" , '-format' => 'Fasta'); +my $out = Bio::SeqIO->new(-file => ">$output_fasta" , '-format' => 'Fasta'); + +while ( my $seq = $in->next_seq() ) +{ + my $id = $seq -> id(); + my $start = $coordinates{$id}{"start"}; + my $end = $coordinates{$id}{"end"}; + + if ($start && $end) + { + my $subseq = $seq->subseq($start,$end); + my $new_seq = Bio::Seq->new( -seq => $subseq, + -id => $id + ); + $out->write_seq($new_seq); + } + + +}