Mercurial > repos > dereeper > subseq_protein
changeset 3:840657df6623 draft
Deleted selected files
author | dereeper |
---|---|
date | Sun, 16 Sep 2012 13:16:02 -0400 |
parents | 542fcfc6e126 |
children | 621bec1d98ea |
files | extract_proteic_seq_using_coordinates.pl extract_proteic_seq_using_coordinates.xml |
diffstat | 2 files changed, 0 insertions(+), 134 deletions(-) [+] |
line wrap: on
line diff
--- a/extract_proteic_seq_using_coordinates.pl Sun Sep 16 10:20:27 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -#!/usr/bin/perl - -use strict; -use Bio::SeqIO; -use Getopt::Long; - -my $usage = qq~Usage:$0 <args> [<opts>] -where <args> are: - -i, --input_fasta <input protein FASTA file> - -o, --output_fasta <output FASTA file> - -c, --coordinates <coordinates file> -~; -$usage .= "\n"; - -my ($input_fasta,$output_fasta,$coordinate_file); - -GetOptions( - "input_fasta=s" => \$input_fasta, - "output_fasta=s" => \$output_fasta, - "coordinates=s" => \$coordinate_file -); - - -die $usage - if ( !$input_fasta || !$output_fasta || !$coordinate_file ); - - -my %coordinates; -open(my $COORD,$coordinate_file); -while(<$COORD>) -{ - my $line = $_; - chomp($line); - my ($id,$start,$end) = split(/\t/,$line); - $coordinates{$id}{"start"} = $start; - $coordinates{$id}{"end"} = $end; -} -close($COORD); - -my $in = Bio::SeqIO->new(-file => "$input_fasta" , '-format' => 'Fasta'); -my $out = Bio::SeqIO->new(-file => ">$output_fasta" , '-format' => 'Fasta'); - -while ( my $seq = $in->next_seq() ) -{ - my $id = $seq -> id(); - my $start = $coordinates{$id}{"start"}; - my $end = $coordinates{$id}{"end"}; - - if ($start && $end) - { - my $subseq = $seq->subseq($start,$end); - my $new_seq = Bio::Seq->new( -seq => $subseq, - -id => $id - ); - $out->write_seq($new_seq); - } - - -}
--- a/extract_proteic_seq_using_coordinates.xml Sun Sep 16 10:20:27 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -<tool id="extract_proteic_seq_from_coordinates" name="Extract protein sequences"> - <description>using coordinates</description> - <command interpreter="perl">extract_proteic_seq_using_coordinates.pl -i $input -o $output -c $coordinates</command> - <inputs> - <param format="fasta" name="input" type="data" label="Protein FASTA file"/> - <param format="tabular" name="coordinates" type="data" label="Coordinates for extraction (tabular)"/> - </inputs> - <outputs> - <data format="fasta" name="output" label="Extracted proteins"/> - </outputs> - <help> - -.. class:: infomark - -**Program encapsulated in Galaxy by Southgreen** - -.. class:: infomark - -**extract_proteic_seq_using_coordinates.pl version 1.0, 2012** - ------ - -========== - Authors: -========== - -**Dereeper A** - ------ - -=========== - Overview: -=========== - -Extract sequences from a protein FASTA file using coordinates. - - ------ - -**Example** - -If the input dataset is:: - - >MCCS00001-0.9-1 - MRLQLGLRRLHFLRRRDHCNHHRRGFATKYSGRVVVETDNGRSFAVEVDNPILQTDVRGY - PLPRRDLICKVVSILQSPPSTASSSSFDDLFMDLSDYLETLNVMITPSEASEILKSLKSP - NLALKFFQFCSSEIPDFRHNSFTYNRILLILSKAYLPNRLDLVRNILNEMDQSATGGSIS - TVNILIGIFSDGQEYGGIDELEKCLGLVKKWELSLNCYTYKCLMQGYLRLNDSKKALEVY - REMTRRGYKLDIFAYNMLLDALAKDEK - >MCCS00001-0.1-1 - MRLNSRFGTSSLIHVSLVLLLCFKASGGSAERSSAFFIFGDSTVDPGNNNYIKTTPENQA - NYKPYGQNGFFKEPTGRFSDGRIIVDYIAEYAKLPIIPPYLQPSADYSHGVNFASGGAGI - LSTTNPGVVIDLKTQLEYFHKVQRSLAEKLGTAEAEEIISNAVYFISMGSNDYMGGYLGN - PEMQQLHPPEDYVRMVIGNLTQGIQELYDRGARKFGFLSLCPLGCLPALRVLNPKGHDAG - CFEQASALALAHSNALQAVLPNLELLLPKGFKYCNSNFYDWLLDRINDPTKYGFKEGESA - CCGAGPYRGIFTCGGTKKDPNYELCDNPSDYVWFDSFHPTERIHEQFAKALWDGLSPSVG - PYNLEGLFFNKQTIADVVDNPETQQIF - -Interval file must be in the form:: - - MCCS00001-0.9-1 2 6 - MCCS00001-0.1-1 5 132 - -Extracting sequences returns:: - - >MCCS00001-0.9-1 - RLQLG - >MCCS00001-0.1-1 - SRFGTSSLIHVSLVLLLCFKASGGSAERSSAFFIFGDSTVDPGNNNYIKTTPENQANYKP - YGQNGFFKEPTGRFSDGRIIVDYIAEYAKLPIIPPYLQPSADYSHGVNFASGGAGILSTT - NPGVVIDL - - - </help> -</tool>