0
|
1 #!/usr/bin/perl
|
|
2
|
|
3 use strict;
|
|
4 use Bio::SeqIO;
|
|
5 use Getopt::Long;
|
|
6
|
|
7 my $usage = qq~Usage:$0 <args> [<opts>]
|
|
8 where <args> are:
|
|
9 -i, --input_fasta <input protein FASTA file>
|
|
10 -o, --output_fasta <output FASTA file>
|
|
11 -c, --coordinates <coordinates file>
|
|
12 ~;
|
|
13 $usage .= "\n";
|
|
14
|
|
15 my ($input_fasta,$output_fasta,$coordinate_file);
|
|
16
|
|
17 GetOptions(
|
|
18 "input_fasta=s" => \$input_fasta,
|
|
19 "output_fasta=s" => \$output_fasta,
|
|
20 "coordinates=s" => \$coordinate_file
|
|
21 );
|
|
22
|
|
23
|
|
24 die $usage
|
|
25 if ( !$input_fasta || !$output_fasta || !$coordinate_file );
|
|
26
|
|
27
|
|
28 my %coordinates;
|
|
29 open(my $COORD,$coordinate_file);
|
|
30 while(<$COORD>)
|
|
31 {
|
|
32 my $line = $_;
|
|
33 chomp($line);
|
|
34 my ($id,$start,$end) = split(/\t/,$line);
|
|
35 $coordinates{$id}{"start"} = $start;
|
|
36 $coordinates{$id}{"end"} = $end;
|
|
37 }
|
|
38 close($COORD);
|
|
39
|
|
40 my $in = Bio::SeqIO->new(-file => "$input_fasta" , '-format' => 'Fasta');
|
|
41 my $out = Bio::SeqIO->new(-file => ">$output_fasta" , '-format' => 'Fasta');
|
|
42
|
|
43 while ( my $seq = $in->next_seq() )
|
|
44 {
|
|
45 my $id = $seq -> id();
|
|
46 my $start = $coordinates{$id}{"start"};
|
|
47 my $end = $coordinates{$id}{"end"};
|
|
48
|
|
49 if ($start && $end)
|
|
50 {
|
|
51 my $subseq = $seq->subseq($start,$end);
|
|
52 my $new_seq = Bio::Seq->new( -seq => $subseq,
|
|
53 -id => $id
|
|
54 );
|
|
55 $out->write_seq($new_seq);
|
|
56 }
|
|
57
|
|
58
|
|
59 }
|