changeset 3:840657df6623 draft

Deleted selected files
author dereeper
date Sun, 16 Sep 2012 13:16:02 -0400
parents 542fcfc6e126
children 621bec1d98ea
files extract_proteic_seq_using_coordinates.pl extract_proteic_seq_using_coordinates.xml
diffstat 2 files changed, 0 insertions(+), 134 deletions(-) [+]
line wrap: on
line diff
--- a/extract_proteic_seq_using_coordinates.pl	Sun Sep 16 10:20:27 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use Bio::SeqIO;
-use Getopt::Long;
-
-my $usage = qq~Usage:$0 <args> [<opts>]
-where <args> are:
-    -i, --input_fasta       <input protein FASTA file>
-    -o, --output_fasta      <output FASTA file>
-    -c, --coordinates       <coordinates file>
-~;
-$usage .= "\n";
-
-my ($input_fasta,$output_fasta,$coordinate_file);
-
-GetOptions(
-	"input_fasta=s"  => \$input_fasta,
-	"output_fasta=s" => \$output_fasta,
-	"coordinates=s"  => \$coordinate_file
-);
-
-
-die $usage
-  if ( !$input_fasta || !$output_fasta || !$coordinate_file );
-  
-
-my %coordinates;
-open(my $COORD,$coordinate_file);
-while(<$COORD>)
-{
-	my $line = $_;
-	chomp($line);
-	my ($id,$start,$end) = split(/\t/,$line);
-	$coordinates{$id}{"start"} = $start;
-	$coordinates{$id}{"end"} = $end;
-}
-close($COORD);
-
-my $in  = Bio::SeqIO->new(-file => "$input_fasta" , '-format' => 'Fasta');
-my $out = Bio::SeqIO->new(-file => ">$output_fasta" , '-format' => 'Fasta');
-    
-while ( my $seq = $in->next_seq() ) 
-{
-	my $id = $seq -> id();
-	my $start = $coordinates{$id}{"start"};
-	my $end = $coordinates{$id}{"end"};
-	
-	if ($start && $end)
-	{
-		my $subseq = $seq->subseq($start,$end);
-		my $new_seq = Bio::Seq->new( -seq => $subseq,
-                                 -id  => $id
-                                 );
-		$out->write_seq($new_seq);
-	}
-
-	
-}
--- a/extract_proteic_seq_using_coordinates.xml	Sun Sep 16 10:20:27 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-<tool id="extract_proteic_seq_from_coordinates" name="Extract protein sequences">
-	<description>using coordinates</description>
-	<command interpreter="perl">extract_proteic_seq_using_coordinates.pl -i $input -o $output -c $coordinates</command>
-	<inputs>
-		<param format="fasta" name="input" type="data" label="Protein FASTA file"/>
-		<param format="tabular" name="coordinates" type="data" label="Coordinates for extraction (tabular)"/>
-	</inputs>
-	<outputs>
-		<data format="fasta" name="output" label="Extracted proteins"/>
-	</outputs>
-	<help>
-	
-.. class:: infomark
-
-**Program encapsulated in Galaxy by Southgreen**
-
-.. class:: infomark
-
-**extract_proteic_seq_using_coordinates.pl version 1.0, 2012**
-
------
-
-==========
- Authors:
-==========
-
-**Dereeper A**
-
------
-
-===========
- Overview:
-===========
-
-Extract sequences from a protein FASTA file using coordinates.  
-
-
------
-
-**Example**
-
-If the input dataset is::
-
-	>MCCS00001-0.9-1
-	MRLQLGLRRLHFLRRRDHCNHHRRGFATKYSGRVVVETDNGRSFAVEVDNPILQTDVRGY
-	PLPRRDLICKVVSILQSPPSTASSSSFDDLFMDLSDYLETLNVMITPSEASEILKSLKSP
-	NLALKFFQFCSSEIPDFRHNSFTYNRILLILSKAYLPNRLDLVRNILNEMDQSATGGSIS
-	TVNILIGIFSDGQEYGGIDELEKCLGLVKKWELSLNCYTYKCLMQGYLRLNDSKKALEVY
-	REMTRRGYKLDIFAYNMLLDALAKDEK
-	>MCCS00001-0.1-1
-	MRLNSRFGTSSLIHVSLVLLLCFKASGGSAERSSAFFIFGDSTVDPGNNNYIKTTPENQA
-	NYKPYGQNGFFKEPTGRFSDGRIIVDYIAEYAKLPIIPPYLQPSADYSHGVNFASGGAGI
-	LSTTNPGVVIDLKTQLEYFHKVQRSLAEKLGTAEAEEIISNAVYFISMGSNDYMGGYLGN
-	PEMQQLHPPEDYVRMVIGNLTQGIQELYDRGARKFGFLSLCPLGCLPALRVLNPKGHDAG
-	CFEQASALALAHSNALQAVLPNLELLLPKGFKYCNSNFYDWLLDRINDPTKYGFKEGESA
-	CCGAGPYRGIFTCGGTKKDPNYELCDNPSDYVWFDSFHPTERIHEQFAKALWDGLSPSVG
-	PYNLEGLFFNKQTIADVVDNPETQQIF
-
-Interval file must be in the form::
-
-	MCCS00001-0.9-1	2	6
-	MCCS00001-0.1-1	5	132
-
-Extracting sequences returns::
-
-	>MCCS00001-0.9-1
-	RLQLG
-	>MCCS00001-0.1-1
-	SRFGTSSLIHVSLVLLLCFKASGGSAERSSAFFIFGDSTVDPGNNNYIKTTPENQANYKP
-	YGQNGFFKEPTGRFSDGRIIVDYIAEYAKLPIIPPYLQPSADYSHGVNFASGGAGILSTT
-	NPGVVIDL
-
-
-	</help>
-</tool>