annotate extract_proteic_seq_using_coordinates.pl @ 4:621bec1d98ea draft default tip

Uploaded
author dereeper
date Sun, 16 Sep 2012 13:16:45 -0400
parents 60507a6de56c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
60507a6de56c Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
60507a6de56c Uploaded
dereeper
parents:
diff changeset
2
60507a6de56c Uploaded
dereeper
parents:
diff changeset
3 use strict;
60507a6de56c Uploaded
dereeper
parents:
diff changeset
4 use Bio::SeqIO;
60507a6de56c Uploaded
dereeper
parents:
diff changeset
5 use Getopt::Long;
60507a6de56c Uploaded
dereeper
parents:
diff changeset
6
60507a6de56c Uploaded
dereeper
parents:
diff changeset
7 my $usage = qq~Usage:$0 <args> [<opts>]
60507a6de56c Uploaded
dereeper
parents:
diff changeset
8 where <args> are:
60507a6de56c Uploaded
dereeper
parents:
diff changeset
9 -i, --input_fasta <input protein FASTA file>
60507a6de56c Uploaded
dereeper
parents:
diff changeset
10 -o, --output_fasta <output FASTA file>
60507a6de56c Uploaded
dereeper
parents:
diff changeset
11 -c, --coordinates <coordinates file>
60507a6de56c Uploaded
dereeper
parents:
diff changeset
12 ~;
60507a6de56c Uploaded
dereeper
parents:
diff changeset
13 $usage .= "\n";
60507a6de56c Uploaded
dereeper
parents:
diff changeset
14
60507a6de56c Uploaded
dereeper
parents:
diff changeset
15 my ($input_fasta,$output_fasta,$coordinate_file);
60507a6de56c Uploaded
dereeper
parents:
diff changeset
16
60507a6de56c Uploaded
dereeper
parents:
diff changeset
17 GetOptions(
60507a6de56c Uploaded
dereeper
parents:
diff changeset
18 "input_fasta=s" => \$input_fasta,
60507a6de56c Uploaded
dereeper
parents:
diff changeset
19 "output_fasta=s" => \$output_fasta,
60507a6de56c Uploaded
dereeper
parents:
diff changeset
20 "coordinates=s" => \$coordinate_file
60507a6de56c Uploaded
dereeper
parents:
diff changeset
21 );
60507a6de56c Uploaded
dereeper
parents:
diff changeset
22
60507a6de56c Uploaded
dereeper
parents:
diff changeset
23
60507a6de56c Uploaded
dereeper
parents:
diff changeset
24 die $usage
60507a6de56c Uploaded
dereeper
parents:
diff changeset
25 if ( !$input_fasta || !$output_fasta || !$coordinate_file );
60507a6de56c Uploaded
dereeper
parents:
diff changeset
26
60507a6de56c Uploaded
dereeper
parents:
diff changeset
27
60507a6de56c Uploaded
dereeper
parents:
diff changeset
28 my %coordinates;
60507a6de56c Uploaded
dereeper
parents:
diff changeset
29 open(my $COORD,$coordinate_file);
60507a6de56c Uploaded
dereeper
parents:
diff changeset
30 while(<$COORD>)
60507a6de56c Uploaded
dereeper
parents:
diff changeset
31 {
60507a6de56c Uploaded
dereeper
parents:
diff changeset
32 my $line = $_;
60507a6de56c Uploaded
dereeper
parents:
diff changeset
33 chomp($line);
60507a6de56c Uploaded
dereeper
parents:
diff changeset
34 my ($id,$start,$end) = split(/\t/,$line);
60507a6de56c Uploaded
dereeper
parents:
diff changeset
35 $coordinates{$id}{"start"} = $start;
60507a6de56c Uploaded
dereeper
parents:
diff changeset
36 $coordinates{$id}{"end"} = $end;
60507a6de56c Uploaded
dereeper
parents:
diff changeset
37 }
60507a6de56c Uploaded
dereeper
parents:
diff changeset
38 close($COORD);
60507a6de56c Uploaded
dereeper
parents:
diff changeset
39
60507a6de56c Uploaded
dereeper
parents:
diff changeset
40 my $in = Bio::SeqIO->new(-file => "$input_fasta" , '-format' => 'Fasta');
60507a6de56c Uploaded
dereeper
parents:
diff changeset
41 my $out = Bio::SeqIO->new(-file => ">$output_fasta" , '-format' => 'Fasta');
60507a6de56c Uploaded
dereeper
parents:
diff changeset
42
60507a6de56c Uploaded
dereeper
parents:
diff changeset
43 while ( my $seq = $in->next_seq() )
60507a6de56c Uploaded
dereeper
parents:
diff changeset
44 {
60507a6de56c Uploaded
dereeper
parents:
diff changeset
45 my $id = $seq -> id();
60507a6de56c Uploaded
dereeper
parents:
diff changeset
46 my $start = $coordinates{$id}{"start"};
60507a6de56c Uploaded
dereeper
parents:
diff changeset
47 my $end = $coordinates{$id}{"end"};
60507a6de56c Uploaded
dereeper
parents:
diff changeset
48
60507a6de56c Uploaded
dereeper
parents:
diff changeset
49 if ($start && $end)
60507a6de56c Uploaded
dereeper
parents:
diff changeset
50 {
60507a6de56c Uploaded
dereeper
parents:
diff changeset
51 my $subseq = $seq->subseq($start,$end);
60507a6de56c Uploaded
dereeper
parents:
diff changeset
52 my $new_seq = Bio::Seq->new( -seq => $subseq,
60507a6de56c Uploaded
dereeper
parents:
diff changeset
53 -id => $id
60507a6de56c Uploaded
dereeper
parents:
diff changeset
54 );
60507a6de56c Uploaded
dereeper
parents:
diff changeset
55 $out->write_seq($new_seq);
60507a6de56c Uploaded
dereeper
parents:
diff changeset
56 }
60507a6de56c Uploaded
dereeper
parents:
diff changeset
57
60507a6de56c Uploaded
dereeper
parents:
diff changeset
58
60507a6de56c Uploaded
dereeper
parents:
diff changeset
59 }