Mercurial > repos > dereeper > subseq_protein
comparison extract_proteic_seq_using_coordinates.pl @ 4:621bec1d98ea draft default tip
Uploaded
author | dereeper |
---|---|
date | Sun, 16 Sep 2012 13:16:45 -0400 |
parents | 60507a6de56c |
children |
comparison
equal
deleted
inserted
replaced
3:840657df6623 | 4:621bec1d98ea |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 use Bio::SeqIO; | |
5 use Getopt::Long; | |
6 | |
7 my $usage = qq~Usage:$0 <args> [<opts>] | |
8 where <args> are: | |
9 -i, --input_fasta <input protein FASTA file> | |
10 -o, --output_fasta <output FASTA file> | |
11 -c, --coordinates <coordinates file> | |
12 ~; | |
13 $usage .= "\n"; | |
14 | |
15 my ($input_fasta,$output_fasta,$coordinate_file); | |
16 | |
17 GetOptions( | |
18 "input_fasta=s" => \$input_fasta, | |
19 "output_fasta=s" => \$output_fasta, | |
20 "coordinates=s" => \$coordinate_file | |
21 ); | |
22 | |
23 | |
24 die $usage | |
25 if ( !$input_fasta || !$output_fasta || !$coordinate_file ); | |
26 | |
27 | |
28 my %coordinates; | |
29 open(my $COORD,$coordinate_file); | |
30 while(<$COORD>) | |
31 { | |
32 my $line = $_; | |
33 chomp($line); | |
34 my ($id,$start,$end) = split(/\t/,$line); | |
35 $coordinates{$id}{"start"} = $start; | |
36 $coordinates{$id}{"end"} = $end; | |
37 } | |
38 close($COORD); | |
39 | |
40 my $in = Bio::SeqIO->new(-file => "$input_fasta" , '-format' => 'Fasta'); | |
41 my $out = Bio::SeqIO->new(-file => ">$output_fasta" , '-format' => 'Fasta'); | |
42 | |
43 while ( my $seq = $in->next_seq() ) | |
44 { | |
45 my $id = $seq -> id(); | |
46 my $start = $coordinates{$id}{"start"}; | |
47 my $end = $coordinates{$id}{"end"}; | |
48 | |
49 if ($start && $end) | |
50 { | |
51 my $subseq = $seq->subseq($start,$end); | |
52 my $new_seq = Bio::Seq->new( -seq => $subseq, | |
53 -id => $id | |
54 ); | |
55 $out->write_seq($new_seq); | |
56 } | |
57 | |
58 | |
59 } |