diff extract_proteic_seq_using_coordinates.pl @ 0:60507a6de56c draft

Uploaded
author dereeper
date Sun, 16 Sep 2012 09:26:09 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_proteic_seq_using_coordinates.pl	Sun Sep 16 09:26:09 2012 -0400
@@ -0,0 +1,59 @@
+#!/usr/bin/perl
+
+use strict;
+use Bio::SeqIO;
+use Getopt::Long;
+
+my $usage = qq~Usage:$0 <args> [<opts>]
+where <args> are:
+    -i, --input_fasta       <input protein FASTA file>
+    -o, --output_fasta      <output FASTA file>
+    -c, --coordinates       <coordinates file>
+~;
+$usage .= "\n";
+
+my ($input_fasta,$output_fasta,$coordinate_file);
+
+GetOptions(
+	"input_fasta=s"  => \$input_fasta,
+	"output_fasta=s" => \$output_fasta,
+	"coordinates=s"  => \$coordinate_file
+);
+
+
+die $usage
+  if ( !$input_fasta || !$output_fasta || !$coordinate_file );
+  
+
+my %coordinates;
+open(my $COORD,$coordinate_file);
+while(<$COORD>)
+{
+	my $line = $_;
+	chomp($line);
+	my ($id,$start,$end) = split(/\t/,$line);
+	$coordinates{$id}{"start"} = $start;
+	$coordinates{$id}{"end"} = $end;
+}
+close($COORD);
+
+my $in  = Bio::SeqIO->new(-file => "$input_fasta" , '-format' => 'Fasta');
+my $out = Bio::SeqIO->new(-file => ">$output_fasta" , '-format' => 'Fasta');
+    
+while ( my $seq = $in->next_seq() ) 
+{
+	my $id = $seq -> id();
+	my $start = $coordinates{$id}{"start"};
+	my $end = $coordinates{$id}{"end"};
+	
+	if ($start && $end)
+	{
+		my $subseq = $seq->subseq($start,$end);
+		my $new_seq = Bio::Seq->new( -seq => $subseq,
+                                 -id  => $id
+                                 );
+		$out->write_seq($new_seq);
+	}
+
+	
+}