changeset 5:0b8fa8b7f356

Uploaded
author rlegendre
date Mon, 20 Oct 2014 11:07:29 -0400
parents eea5fec46e5c
children 29c9c86e17e1
files metagene_frameshift_analysis.xml
diffstat 1 files changed, 73 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metagene_frameshift_analysis.xml	Mon Oct 20 11:07:29 2014 -0400
@@ -0,0 +1,73 @@
+<tool id="frameshift_analysis" name="Frame">
+	<description> Analyse Ribo-seq alignment to extract translational ambiguities events</description>
+	<requirements>
+	    <requirement type="package">samtools</requirement>
+	    <requirement type="python-module">matplotlib</requirement>
+	    <requirement type="python-module">numpy</requirement>
+	    <requirement type="python-module">PIL</requirement>
+	    <requirement type="python-module">Bio</requirement>
+	</requirements>
+	<command interpreter="python"> 
+		metagene_frameshift_analysis.py --input $reference --bam $mapping --cutoff $cutoff --kmer $kmer --fasta $fasta --dirout $output,$output.files_path --box $boxplot> $log
+
+	</command>
+
+	<inputs>
+		<param name="reference" type="data" label="References Input Annotation File (gff)" format="gff" />
+		<param name="mapping" type="data" label="Bam Input File" format="bam" />
+		<param name="fasta" type="data" label="Reference in fasta format" format="fasta" />
+		<param name="kmer" type="integer" label="Longer of the best phasing reads" value ="28"  />
+		<param name="cutoff" type="integer" label="Cutoff for frame proportion in coding phase (default = 60 %)" value ="60"  />
+	</inputs>
+
+	<outputs>
+		<data format="tabular" name="log" label="[RP]Stat File on ${on_string}"/>
+		<data format="html" name="output" label="[RP]Dual coding results on ${on_string}"/>
+		<data format="png" name="boxplot" label="[RP]Boxplot on ${on_string}"/>	
+
+	</outputs>
+
+	<help>
+Summary
+-------    
+This tool uses Ribo-seq data (bam file) to extract out-of-frame footprints in all genes from a reference annotation file (GFF3). Subprofile are plotted for each gene with dual coding events.
+
+
+*- GFF3 file* : It must contain 9 tabulate-delimited columns : Chromosome, source, feature, start, stop, score, strand, phasing, note. The gene ID was retrieved in note field by "ID=" tag.
+
+*- Fasta file* : Reference fasta file. Be careful about the chromosome nomenclature used : it must be compatible with your GFF3 annotation file.
+
+*- BAM file* : It must be sorted. It can contain either multiples or unaligned footprints
+
+*- Kmer* : Lenght of the best phasing footprint. You can compute it running kmer_analysis 
+				
+*- Cutoff* : Integer value for selecting all genes that have less than 60 % (default) of footprints in coding frame.
+
+
+
+.................................................................................................................................................................................................
+
+
+
+Output 
+------- 
+This tool generates 2 output files :
+
+*- html file* : relative to translational ambiguities detection and visualization.
+
+*- Stat file* : statistiques about treated footprints and phasing.
+
+*- Boxplot* : Proportion of footprints in the three frames for all genes.
+
+
+Dependances
+------------
+
+.. class:: warningmark
+
+This tool depends on Python (>=2.7) and following packages : numpy 1.8.0, Biopython 1.58, matplotlib 1.3.1. Samtools is used for bam manipulation.
+
+
+	</help>
+</tool>
+