changeset 7:b5cbe8152d65 draft

Uploaded
author dsobral
date Sat, 03 Mar 2018 11:47:02 -0500
parents 66d27a0c2b3c
children 22580c27a02d
files breseq.xml breseq_wrapper.py tool-data/genbank_files.loc.sample tool_data_table_conf.xml.sample
diffstat 4 files changed, 4 insertions(+), 239 deletions(-) [+]
line wrap: on
line diff
--- a/breseq.xml	Sat Mar 03 11:46:10 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,160 +0,0 @@
-<tool id="breseq" name="Breseq Variant Report" version="0.31.1" hidden="false">
-    <description>Runs Breseq software on a set of fastq files</description>
-
-    <requirements>
-        <requirement type="package" version="0.31.1">breseq</requirement>
-    </requirements>
-
-    <command interpreter="python">
-        breseq_wrapper.py
-
-        $outfile
-        $outfile.files_path
-
-        --num-processors \${GALAXY_SLOTS:-4}
-
-        #if str($reference.source) == "history":
-	    #for $i, $s in enumerate( $reference.ref_series )
-            	-r $s.own_genome
-            #end for
-        #else:
-            -r $reference.fixed_genome.fields.path
-        #end if
-        #for $i, $s in enumerate( $read_series )
-            ${s.input}
-        #end for
-
-        #if str($polymorphism.selection) == "yes":
-            --polymorphism-prediction
-            --polymorphism-reject-indel-homopolymer-length $polymorphism.indel_homopolymer_length
-            --polymorphism-reject-surrounding-homopolymer-length $polymorphism.surrounding_homopolymer_length
-            --polymorphism-minimum-coverage-each-strand $polymorphism.strand_coverage
-            --polymorphism-bias-cutoff $polymorphism.bias_pvalue
-        #end if
-
-        #if str($junction_reference.selection) == "yes":
-            #for $i, $s in enumerate( $junction_reference.j_series )
-                --junction-only-reference $s.jc_genome
-            #end for
-        #end if
-
-        ${cnv_evidence}
- 
-        -b $minqvalue
-
-    </command>
-
-    <stdio>
-        <exit_code range="1:"  level="fatal"   description="Fatal ERROR exit code greater than 1" />
-    </stdio>
-
-    <inputs>
-        <!-- reference genome -->
-        <conditional name="reference">
-            <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in genome?" >
-                <option value="indexed">Use a built-in genome</option>
-                <option value="history">Use one from the history</option>
-            </param>
-            <when value="indexed">
-                <param name="fixed_genome" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Bioinformatics team">
-                    <options from_data_table="genbank_files">
-                        <filter type="sort_by" column="2"/>
-                        <validator type="no_options" message="No indexes are available for the selected input dataset"/>
-                    </options>
-                </param>
-            </when>
-            <when value="history">
-                <!-- <param name="own_genome" type="data" label="Select the reference genome (fasta or genbank)" /> -->
-                <repeat name="ref_series" title="Reference Genome" min="1">
-                        <param name="own_genome" type="data" label="Select the reference genome (fasta or genbank)" />
-                </repeat>
-            </when>
-        </conditional>
-
-
-        <!-- input Fastq files -->
-        <repeat name="read_series" title="Read File" min="1">
-            <param name="input" type="data" format="fastq" label="Dataset" />
-        </repeat>
-
-
-        <!-- select polymorphism -->
-        <conditional name="polymorphism">
-            <param name="selection" type="select" label="Perform polimorphism detection" help="Do you want to perform polimorphism detection in a population">
-                <option value="no">Do not perform polymorphism detection</option>
-                <option value="yes">Perform polymorphism detection</option>
-            </param>
-            <when value="yes">
-                <param name="indel_homopolymer_length" type="integer" value="0" label="Reject insertion/deletion polymorphisms due to homopolymer repeats with this length or greater" />
-                <param name="surrounding_homopolymer_length" type="integer" value="0" label="Do not predict polymorphic base substitutions that create a homopolymer with this length on each side (with 2 TTATT->TTTTT is rejected)" />
-                <param name="strand_coverage" type="integer" value="3" label="Only accept polymorphisms if coverage in each strand is at least this" />
-                <param name="bias_pvalue" type="float" value="0.05" label="Only accept polymorphisms if pvalue of strand or read quality bias is greater than this" />
-            </when>
-            <when value="no" />
-        </conditional>
-
-        <!-- junction only reference(s) -->
-        <conditional name="junction_reference">
-            <param name="selection" type="select" label="Detect external sequence insertion" help="You can select external sequences to detect insertions">
-                <option value="no">Do not detect external sequence insertion</option>
-                <option value="yes">Detect external sequence insertions</option>
-            </param>
-            <when value="yes">
-        	<repeat name="j_series" title="Junction-only references" min="1">
-            		<param name="jc_genome" type="data" label="Select an external sequence (fasta or genbank)" />
-        	</repeat>
-            </when>
-            <when value="no" />
-        </conditional>
-
-        <!-- Copy Number Evidence -->
-        <param name="cnv_evidence" type="select" label="Copy number variation prediction (experimental option)" help="Do you want to perform copy number variation prediction">
-         <option value="">Do not perform copy number variaion prediction</option>
-         <option value="--cnv">Perform copy number variation prediction (--cnv)</option>
-        </param>
-
-        <param name="minqvalue" type="integer" value="3" label="Minimum Phred Q for a base to be considered" />
-
-
-    </inputs>
-
-    <outputs>
-        <data format="prezip.html" name="outfile" label="Breseq HTML report" />
-    </outputs>
-
-    <help>
-**Breseq**
-
-Breseq_ is a computational pipeline for finding mutations relative to a reference sequence in short-read DNA re-sequencing data for microbial sized genomes.
-
-.. _Breseq: http://barricklab.org/twiki/bin/view/Lab/ToolsBacterialGenomeResequencing
-
-------
-
-**Inputs**
-
-Breseq accepts files in FASTQ format. It does not take pair-end information into account.
-
-You can either run in clonal (consensus) mode or search for polymorphisms in a population.
-
-You can also select an external sequence (eg. a transposon) to detect for insertions or horizontal transfer.
-
-
-------
-
-**Outputs**
-
-Breseq outputs a number of files. These are all condensed in a single zipped file.
-
-It contains output files with the final results, accessible through ``output/index.html``
-
-It also contains data files with accessory data, including:
-
-- ``data/reference.fasta`` (file with reference genome: can be used in eg. IGV browser)
-- ``data/reference.gff`` (file with genomic annotations: can be used in eg. IGV browser)
-- ``data/areference.bam`` (file with read alignments: can be used in eg. IGV browser)
-- ``data/unmatched.*`` (files with read that failed to align: can be used to build an assembly or to eg. blast against NCBI)
-
-    </help>
-</tool>
-
--- a/breseq_wrapper.py	Sat Mar 03 11:46:10 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import print_function
-import sys
-import os
-import shutil
-from subprocess import call
-from glob import glob
-
-"""
-Expected input format:
-    ./thiscript output_file output_folder zipfile (args)
-"""
-
-
-def fail(exitcode):
-    sys.stderr.write("Breseq did not finish successfully\n")
-    sys.stderr.write("Exit code was: {0}\n".format(exitcode))
-    sys.exit(1)
-
-
-def main(args):
-    output, outdir = args[1:3]
-    cmd = ["breseq"] + args[3:]
-
-    returncode = call(cmd)
-
-    if returncode != 0:
-        fail(returncode)
-
-    # the short HTML report
-    summary = "output/summary.html"
-    if not os.path.isfile(summary):
-        fail(returncode)
-
-    # Data will be in the zipfile together will all the content of "output"
-    shutil.move("data", "output/")
-
-    if not os.path.isfile(outdir):
-        os.mkdir(outdir)
-
-    # Zip the whole folder
-    shutil.make_archive("results", "zip", os.path.dirname(outdir), "output/")
-    shutil.move("results.zip", outdir)
-
-    shutil.copy(summary, output)
-
-    # move all the files needed for preview of HTML
-    for file in glob("output/*"):
-        dest = file.split("/", 1)[1]
-        # Folder "data" only needs to go in the zipfile not the HTML preview
-        if file == "data":
-            continue
-
-        shutil.move(file, os.path.join(outdir, dest))
-
-
-def usage():
-    err = sys.stderr
-    err.write("Usage:\n")
-    err.write("  {0} output_filename output_folder [args]\n".format(
-        sys.argv[0]))
-    sys.exit(1)
-
-if __name__ == "__main__":
-    args = sys.argv
-
-    if len(args) <= 3:
-        usage()
-
-    main(args)
-
-# vim: ai sts=4 et sw=4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/genbank_files.loc.sample	Sat Mar 03 11:47:02 2018 -0500
@@ -0,0 +1,4 @@
+#This configures genbank files available for specific galaxy tools (eg. breseq)
+#<unique_build_id>\t<dbkey>\t<display_name>\t<file_path>
+#ecoliMC4100	ecoliMC4100	E. coli MC4100	/afs/igc.gulbenkian.pt/folders/UBI/PROJECTS/GALAXY/genomes/genbank/NC_012759.1_BW2952.gb
+#ecoliMG1655	ecoliMG1655	E. coli MG1655	/afs/igc.gulbenkian.pt/folders/UBI/PROJECTS/GALAXY/genomes/genbank/NC_012759.1_BW2952.gb
--- a/tool_data_table_conf.xml.sample	Sat Mar 03 11:46:10 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-<tables>
-    <table name="genbank_files" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/genbank_files.loc" />
-    </table>
-</tables>