Mercurial > repos > dsobral > breseq
changeset 7:b5cbe8152d65 draft
Uploaded
author | dsobral |
---|---|
date | Sat, 03 Mar 2018 11:47:02 -0500 |
parents | 66d27a0c2b3c |
children | 22580c27a02d |
files | breseq.xml breseq_wrapper.py tool-data/genbank_files.loc.sample tool_data_table_conf.xml.sample |
diffstat | 4 files changed, 4 insertions(+), 239 deletions(-) [+] |
line wrap: on
line diff
--- a/breseq.xml Sat Mar 03 11:46:10 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,160 +0,0 @@ -<tool id="breseq" name="Breseq Variant Report" version="0.31.1" hidden="false"> - <description>Runs Breseq software on a set of fastq files</description> - - <requirements> - <requirement type="package" version="0.31.1">breseq</requirement> - </requirements> - - <command interpreter="python"> - breseq_wrapper.py - - $outfile - $outfile.files_path - - --num-processors \${GALAXY_SLOTS:-4} - - #if str($reference.source) == "history": - #for $i, $s in enumerate( $reference.ref_series ) - -r $s.own_genome - #end for - #else: - -r $reference.fixed_genome.fields.path - #end if - #for $i, $s in enumerate( $read_series ) - ${s.input} - #end for - - #if str($polymorphism.selection) == "yes": - --polymorphism-prediction - --polymorphism-reject-indel-homopolymer-length $polymorphism.indel_homopolymer_length - --polymorphism-reject-surrounding-homopolymer-length $polymorphism.surrounding_homopolymer_length - --polymorphism-minimum-coverage-each-strand $polymorphism.strand_coverage - --polymorphism-bias-cutoff $polymorphism.bias_pvalue - #end if - - #if str($junction_reference.selection) == "yes": - #for $i, $s in enumerate( $junction_reference.j_series ) - --junction-only-reference $s.jc_genome - #end for - #end if - - ${cnv_evidence} - - -b $minqvalue - - </command> - - <stdio> - <exit_code range="1:" level="fatal" description="Fatal ERROR exit code greater than 1" /> - </stdio> - - <inputs> - <!-- reference genome --> - <conditional name="reference"> - <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in genome?" > - <option value="indexed">Use a built-in genome</option> - <option value="history">Use one from the history</option> - </param> - <when value="indexed"> - <param name="fixed_genome" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Bioinformatics team"> - <options from_data_table="genbank_files"> - <filter type="sort_by" column="2"/> - <validator type="no_options" message="No indexes are available for the selected input dataset"/> - </options> - </param> - </when> - <when value="history"> - <!-- <param name="own_genome" type="data" label="Select the reference genome (fasta or genbank)" /> --> - <repeat name="ref_series" title="Reference Genome" min="1"> - <param name="own_genome" type="data" label="Select the reference genome (fasta or genbank)" /> - </repeat> - </when> - </conditional> - - - <!-- input Fastq files --> - <repeat name="read_series" title="Read File" min="1"> - <param name="input" type="data" format="fastq" label="Dataset" /> - </repeat> - - - <!-- select polymorphism --> - <conditional name="polymorphism"> - <param name="selection" type="select" label="Perform polimorphism detection" help="Do you want to perform polimorphism detection in a population"> - <option value="no">Do not perform polymorphism detection</option> - <option value="yes">Perform polymorphism detection</option> - </param> - <when value="yes"> - <param name="indel_homopolymer_length" type="integer" value="0" label="Reject insertion/deletion polymorphisms due to homopolymer repeats with this length or greater" /> - <param name="surrounding_homopolymer_length" type="integer" value="0" label="Do not predict polymorphic base substitutions that create a homopolymer with this length on each side (with 2 TTATT->TTTTT is rejected)" /> - <param name="strand_coverage" type="integer" value="3" label="Only accept polymorphisms if coverage in each strand is at least this" /> - <param name="bias_pvalue" type="float" value="0.05" label="Only accept polymorphisms if pvalue of strand or read quality bias is greater than this" /> - </when> - <when value="no" /> - </conditional> - - <!-- junction only reference(s) --> - <conditional name="junction_reference"> - <param name="selection" type="select" label="Detect external sequence insertion" help="You can select external sequences to detect insertions"> - <option value="no">Do not detect external sequence insertion</option> - <option value="yes">Detect external sequence insertions</option> - </param> - <when value="yes"> - <repeat name="j_series" title="Junction-only references" min="1"> - <param name="jc_genome" type="data" label="Select an external sequence (fasta or genbank)" /> - </repeat> - </when> - <when value="no" /> - </conditional> - - <!-- Copy Number Evidence --> - <param name="cnv_evidence" type="select" label="Copy number variation prediction (experimental option)" help="Do you want to perform copy number variation prediction"> - <option value="">Do not perform copy number variaion prediction</option> - <option value="--cnv">Perform copy number variation prediction (--cnv)</option> - </param> - - <param name="minqvalue" type="integer" value="3" label="Minimum Phred Q for a base to be considered" /> - - - </inputs> - - <outputs> - <data format="prezip.html" name="outfile" label="Breseq HTML report" /> - </outputs> - - <help> -**Breseq** - -Breseq_ is a computational pipeline for finding mutations relative to a reference sequence in short-read DNA re-sequencing data for microbial sized genomes. - -.. _Breseq: http://barricklab.org/twiki/bin/view/Lab/ToolsBacterialGenomeResequencing - ------- - -**Inputs** - -Breseq accepts files in FASTQ format. It does not take pair-end information into account. - -You can either run in clonal (consensus) mode or search for polymorphisms in a population. - -You can also select an external sequence (eg. a transposon) to detect for insertions or horizontal transfer. - - ------- - -**Outputs** - -Breseq outputs a number of files. These are all condensed in a single zipped file. - -It contains output files with the final results, accessible through ``output/index.html`` - -It also contains data files with accessory data, including: - -- ``data/reference.fasta`` (file with reference genome: can be used in eg. IGV browser) -- ``data/reference.gff`` (file with genomic annotations: can be used in eg. IGV browser) -- ``data/areference.bam`` (file with read alignments: can be used in eg. IGV browser) -- ``data/unmatched.*`` (files with read that failed to align: can be used to build an assembly or to eg. blast against NCBI) - - </help> -</tool> -
--- a/breseq_wrapper.py Sat Mar 03 11:46:10 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,73 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import print_function -import sys -import os -import shutil -from subprocess import call -from glob import glob - -""" -Expected input format: - ./thiscript output_file output_folder zipfile (args) -""" - - -def fail(exitcode): - sys.stderr.write("Breseq did not finish successfully\n") - sys.stderr.write("Exit code was: {0}\n".format(exitcode)) - sys.exit(1) - - -def main(args): - output, outdir = args[1:3] - cmd = ["breseq"] + args[3:] - - returncode = call(cmd) - - if returncode != 0: - fail(returncode) - - # the short HTML report - summary = "output/summary.html" - if not os.path.isfile(summary): - fail(returncode) - - # Data will be in the zipfile together will all the content of "output" - shutil.move("data", "output/") - - if not os.path.isfile(outdir): - os.mkdir(outdir) - - # Zip the whole folder - shutil.make_archive("results", "zip", os.path.dirname(outdir), "output/") - shutil.move("results.zip", outdir) - - shutil.copy(summary, output) - - # move all the files needed for preview of HTML - for file in glob("output/*"): - dest = file.split("/", 1)[1] - # Folder "data" only needs to go in the zipfile not the HTML preview - if file == "data": - continue - - shutil.move(file, os.path.join(outdir, dest)) - - -def usage(): - err = sys.stderr - err.write("Usage:\n") - err.write(" {0} output_filename output_folder [args]\n".format( - sys.argv[0])) - sys.exit(1) - -if __name__ == "__main__": - args = sys.argv - - if len(args) <= 3: - usage() - - main(args) - -# vim: ai sts=4 et sw=4
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/genbank_files.loc.sample Sat Mar 03 11:47:02 2018 -0500 @@ -0,0 +1,4 @@ +#This configures genbank files available for specific galaxy tools (eg. breseq) +#<unique_build_id>\t<dbkey>\t<display_name>\t<file_path> +#ecoliMC4100 ecoliMC4100 E. coli MC4100 /afs/igc.gulbenkian.pt/folders/UBI/PROJECTS/GALAXY/genomes/genbank/NC_012759.1_BW2952.gb +#ecoliMG1655 ecoliMG1655 E. coli MG1655 /afs/igc.gulbenkian.pt/folders/UBI/PROJECTS/GALAXY/genomes/genbank/NC_012759.1_BW2952.gb
--- a/tool_data_table_conf.xml.sample Sat Mar 03 11:46:10 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<tables> - <table name="genbank_files" comment_char="#"> - <columns>value, dbkey, name, path</columns> - <file path="tool-data/genbank_files.loc" /> - </table> -</tables>