comparison extract_proteic_seq_using_coordinates.pl @ 4:621bec1d98ea draft default tip

Uploaded
author dereeper
date Sun, 16 Sep 2012 13:16:45 -0400
parents 60507a6de56c
children
comparison
equal deleted inserted replaced
3:840657df6623 4:621bec1d98ea
1 #!/usr/bin/perl
2
3 use strict;
4 use Bio::SeqIO;
5 use Getopt::Long;
6
7 my $usage = qq~Usage:$0 <args> [<opts>]
8 where <args> are:
9 -i, --input_fasta <input protein FASTA file>
10 -o, --output_fasta <output FASTA file>
11 -c, --coordinates <coordinates file>
12 ~;
13 $usage .= "\n";
14
15 my ($input_fasta,$output_fasta,$coordinate_file);
16
17 GetOptions(
18 "input_fasta=s" => \$input_fasta,
19 "output_fasta=s" => \$output_fasta,
20 "coordinates=s" => \$coordinate_file
21 );
22
23
24 die $usage
25 if ( !$input_fasta || !$output_fasta || !$coordinate_file );
26
27
28 my %coordinates;
29 open(my $COORD,$coordinate_file);
30 while(<$COORD>)
31 {
32 my $line = $_;
33 chomp($line);
34 my ($id,$start,$end) = split(/\t/,$line);
35 $coordinates{$id}{"start"} = $start;
36 $coordinates{$id}{"end"} = $end;
37 }
38 close($COORD);
39
40 my $in = Bio::SeqIO->new(-file => "$input_fasta" , '-format' => 'Fasta');
41 my $out = Bio::SeqIO->new(-file => ">$output_fasta" , '-format' => 'Fasta');
42
43 while ( my $seq = $in->next_seq() )
44 {
45 my $id = $seq -> id();
46 my $start = $coordinates{$id}{"start"};
47 my $end = $coordinates{$id}{"end"};
48
49 if ($start && $end)
50 {
51 my $subseq = $seq->subseq($start,$end);
52 my $new_seq = Bio::Seq->new( -seq => $subseq,
53 -id => $id
54 );
55 $out->write_seq($new_seq);
56 }
57
58
59 }