view extract_proteic_seq_using_coordinates.pl @ 4:621bec1d98ea draft default tip

Uploaded
author dereeper
date Sun, 16 Sep 2012 13:16:45 -0400
parents 60507a6de56c
children
line wrap: on
line source

#!/usr/bin/perl

use strict;
use Bio::SeqIO;
use Getopt::Long;

my $usage = qq~Usage:$0 <args> [<opts>]
where <args> are:
    -i, --input_fasta       <input protein FASTA file>
    -o, --output_fasta      <output FASTA file>
    -c, --coordinates       <coordinates file>
~;
$usage .= "\n";

my ($input_fasta,$output_fasta,$coordinate_file);

GetOptions(
	"input_fasta=s"  => \$input_fasta,
	"output_fasta=s" => \$output_fasta,
	"coordinates=s"  => \$coordinate_file
);


die $usage
  if ( !$input_fasta || !$output_fasta || !$coordinate_file );
  

my %coordinates;
open(my $COORD,$coordinate_file);
while(<$COORD>)
{
	my $line = $_;
	chomp($line);
	my ($id,$start,$end) = split(/\t/,$line);
	$coordinates{$id}{"start"} = $start;
	$coordinates{$id}{"end"} = $end;
}
close($COORD);

my $in  = Bio::SeqIO->new(-file => "$input_fasta" , '-format' => 'Fasta');
my $out = Bio::SeqIO->new(-file => ">$output_fasta" , '-format' => 'Fasta');
    
while ( my $seq = $in->next_seq() ) 
{
	my $id = $seq -> id();
	my $start = $coordinates{$id}{"start"};
	my $end = $coordinates{$id}{"end"};
	
	if ($start && $end)
	{
		my $subseq = $seq->subseq($start,$end);
		my $new_seq = Bio::Seq->new( -seq => $subseq,
                                 -id  => $id
                                 );
		$out->write_seq($new_seq);
	}

	
}