Mercurial > repos > dereeper > subseq_protein
comparison extract_proteic_seq_using_coordinates.pl @ 4:621bec1d98ea draft default tip
Uploaded
| author | dereeper |
|---|---|
| date | Sun, 16 Sep 2012 13:16:45 -0400 |
| parents | 60507a6de56c |
| children |
comparison
equal
deleted
inserted
replaced
| 3:840657df6623 | 4:621bec1d98ea |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 | |
| 3 use strict; | |
| 4 use Bio::SeqIO; | |
| 5 use Getopt::Long; | |
| 6 | |
| 7 my $usage = qq~Usage:$0 <args> [<opts>] | |
| 8 where <args> are: | |
| 9 -i, --input_fasta <input protein FASTA file> | |
| 10 -o, --output_fasta <output FASTA file> | |
| 11 -c, --coordinates <coordinates file> | |
| 12 ~; | |
| 13 $usage .= "\n"; | |
| 14 | |
| 15 my ($input_fasta,$output_fasta,$coordinate_file); | |
| 16 | |
| 17 GetOptions( | |
| 18 "input_fasta=s" => \$input_fasta, | |
| 19 "output_fasta=s" => \$output_fasta, | |
| 20 "coordinates=s" => \$coordinate_file | |
| 21 ); | |
| 22 | |
| 23 | |
| 24 die $usage | |
| 25 if ( !$input_fasta || !$output_fasta || !$coordinate_file ); | |
| 26 | |
| 27 | |
| 28 my %coordinates; | |
| 29 open(my $COORD,$coordinate_file); | |
| 30 while(<$COORD>) | |
| 31 { | |
| 32 my $line = $_; | |
| 33 chomp($line); | |
| 34 my ($id,$start,$end) = split(/\t/,$line); | |
| 35 $coordinates{$id}{"start"} = $start; | |
| 36 $coordinates{$id}{"end"} = $end; | |
| 37 } | |
| 38 close($COORD); | |
| 39 | |
| 40 my $in = Bio::SeqIO->new(-file => "$input_fasta" , '-format' => 'Fasta'); | |
| 41 my $out = Bio::SeqIO->new(-file => ">$output_fasta" , '-format' => 'Fasta'); | |
| 42 | |
| 43 while ( my $seq = $in->next_seq() ) | |
| 44 { | |
| 45 my $id = $seq -> id(); | |
| 46 my $start = $coordinates{$id}{"start"}; | |
| 47 my $end = $coordinates{$id}{"end"}; | |
| 48 | |
| 49 if ($start && $end) | |
| 50 { | |
| 51 my $subseq = $seq->subseq($start,$end); | |
| 52 my $new_seq = Bio::Seq->new( -seq => $subseq, | |
| 53 -id => $id | |
| 54 ); | |
| 55 $out->write_seq($new_seq); | |
| 56 } | |
| 57 | |
| 58 | |
| 59 } |
