# HG changeset patch # User dereeper # Date 1347802897 14400 # Node ID 5547f1dfd12e0c9566f80bb10dba4417c2cb27d7 # Parent 60507a6de56cef173644dc14ab86c50f26b92f4b Deleted selected files diff -r 60507a6de56c -r 5547f1dfd12e extract_proteic_seq_using_coordinates.pl --- a/extract_proteic_seq_using_coordinates.pl Sun Sep 16 09:26:09 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -#!/usr/bin/perl - -use strict; -use Bio::SeqIO; -use Getopt::Long; - -my $usage = qq~Usage:$0 [] -where are: - -i, --input_fasta - -o, --output_fasta - -c, --coordinates -~; -$usage .= "\n"; - -my ($input_fasta,$output_fasta,$coordinate_file); - -GetOptions( - "input_fasta=s" => \$input_fasta, - "output_fasta=s" => \$output_fasta, - "coordinates=s" => \$coordinate_file -); - - -die $usage - if ( !$input_fasta || !$output_fasta || !$coordinate_file ); - - -my %coordinates; -open(my $COORD,$coordinate_file); -while(<$COORD>) -{ - my $line = $_; - chomp($line); - my ($id,$start,$end) = split(/\t/,$line); - $coordinates{$id}{"start"} = $start; - $coordinates{$id}{"end"} = $end; -} -close($COORD); - -my $in = Bio::SeqIO->new(-file => "$input_fasta" , '-format' => 'Fasta'); -my $out = Bio::SeqIO->new(-file => ">$output_fasta" , '-format' => 'Fasta'); - -while ( my $seq = $in->next_seq() ) -{ - my $id = $seq -> id(); - my $start = $coordinates{$id}{"start"}; - my $end = $coordinates{$id}{"end"}; - - if ($start && $end) - { - my $subseq = $seq->subseq($start,$end); - my $new_seq = Bio::Seq->new( -seq => $subseq, - -id => $id - ); - $out->write_seq($new_seq); - } - - -} diff -r 60507a6de56c -r 5547f1dfd12e extract_proteic_seq_using_coordinates.sh --- a/extract_proteic_seq_using_coordinates.sh Sun Sep 16 09:26:09 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -#!/bin/bash -input=$1 -output=$2 -coordinates=$3 -$HOME/galaxy_dist/tools/extract/extract_proteic_seq_using_coordinates.pl -i $input -o $output -c $coordinates; diff -r 60507a6de56c -r 5547f1dfd12e extract_proteic_seq_using_coordinates.xml --- a/extract_proteic_seq_using_coordinates.xml Sun Sep 16 09:26:09 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ - - using coordinates - ./extract_proteic_seq_using_coordinates.sh $input $output $coordinates - - - - - - - - - -.. class:: infomark - -**Program encapsulated in Galaxy by Southgreen** - -.. class:: infomark - -**extract_proteic_seq_using_coordinates.pl version 1.0, 2012** - ------ - -========== - Authors: -========== - -**Dereeper A** - ------ - -=========== - Overview: -=========== - -Extract sequences from a protein FASTA file using coordinates. - - ------ - -**Example** - -If the input dataset is:: - - >MCCS00001-0.9-1 - MRLQLGLRRLHFLRRRDHCNHHRRGFATKYSGRVVVETDNGRSFAVEVDNPILQTDVRGY - PLPRRDLICKVVSILQSPPSTASSSSFDDLFMDLSDYLETLNVMITPSEASEILKSLKSP - NLALKFFQFCSSEIPDFRHNSFTYNRILLILSKAYLPNRLDLVRNILNEMDQSATGGSIS - TVNILIGIFSDGQEYGGIDELEKCLGLVKKWELSLNCYTYKCLMQGYLRLNDSKKALEVY - REMTRRGYKLDIFAYNMLLDALAKDEK - >MCCS00001-0.1-1 - MRLNSRFGTSSLIHVSLVLLLCFKASGGSAERSSAFFIFGDSTVDPGNNNYIKTTPENQA - NYKPYGQNGFFKEPTGRFSDGRIIVDYIAEYAKLPIIPPYLQPSADYSHGVNFASGGAGI - LSTTNPGVVIDLKTQLEYFHKVQRSLAEKLGTAEAEEIISNAVYFISMGSNDYMGGYLGN - PEMQQLHPPEDYVRMVIGNLTQGIQELYDRGARKFGFLSLCPLGCLPALRVLNPKGHDAG - CFEQASALALAHSNALQAVLPNLELLLPKGFKYCNSNFYDWLLDRINDPTKYGFKEGESA - CCGAGPYRGIFTCGGTKKDPNYELCDNPSDYVWFDSFHPTERIHEQFAKALWDGLSPSVG - PYNLEGLFFNKQTIADVVDNPETQQIF - -Interval file must be in the form:: - - MCCS00001-0.9-1 2 6 - MCCS00001-0.1-1 5 132 - -Extracting sequences returns:: - - >MCCS00001-0.9-1 - RLQLG - >MCCS00001-0.1-1 - SRFGTSSLIHVSLVLLLCFKASGGSAERSSAFFIFGDSTVDPGNNNYIKTTPENQANYKP - YGQNGFFKEPTGRFSDGRIIVDYIAEYAKLPIIPPYLQPSADYSHGVNFASGGAGILSTT - NPGVVIDL - - - -