Mercurial > repos > iuc > delly_classify
changeset 1:148b595025b3 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/delly commit e0d4688a59e6eeba33adcfe803ac43d0bc2863e7"
author | iuc |
---|---|
date | Tue, 31 Aug 2021 08:00:10 +0000 |
parents | 10b025ea9d24 |
children | 15a02df32c2b |
files | cnv.xml.orig macros.xml macros.xml.orig merge.xml.orig |
diffstat | 4 files changed, 554 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cnv.xml.orig Tue Aug 31 08:00:10 2021 +0000 @@ -0,0 +1,207 @@ +<?xml version="1.0"?> +<<<<<<< HEAD:tools/delly/cnv.xml +<tool id="delly_cnv" name="Delly cnv" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="18.01"> + <description>discover and genotype copy-number variants</description> +======= +<tool id="delly_rd" name="Delly read-depth (rd)" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="18.01"> + <description>normalization on read-depth profiles</description> + <expand macro="bio_tools"/> +>>>>>>> 20ed9dd6f (add bio.tools ID):tools/delly/rd.xml + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ +## run +delly cnv +## generic options +--genome '$generic.genome' +--quality '$generic.quality' +--mappability '$generic.mappability' +--ploidy $generic.ploidy +--outfile 'result.bcf' +--covfile 'result.gz' +## cnv calling options +--sdrd $cnv.sdrd +--cn-offset $cnv.cnoffset +--cnv-size $cnv.cnvsize +#if $cnv.svfile + --svfile $cnv.svfile +#end if +#if $cnv.vcffile + --vcffile '$cnv.vcffile' +#end if +$cnv.segmentation +## read-depth window options +--window-size $read.windowsize +--window-offset $read.windowoffset +#if $read.bedintervals + --bed-intervals '$read.bedintervals' +#end if +--fraction-window $read.fractionwindow +$read.adaptivewindowing +## gc fragment normalization options +--scan-window $gc.scanwindow +--fraction-unique $gc.fractionunique +#if $gc.scanregions + --scan-regions '$gc.scanregions' +#end if +--mad-cutoff $gc.madcutoff +--percentile $gc.percentile +$gc.nowindowselection +## input +'$input' + +## postprocessing +@LOG@ + ]]></command> + <inputs> + <expand macro="input" format="bam" label="Select input file"/> + <section name="generic" title="Generic options" expanded="true"> + <expand macro="genome"/> + <param argument="--quality" type="integer" value="10" label="Set minimum mapping quality"/> + <param argument="--mappability" type="data" format="fasta" label="Select mappability map file"/> + <param argument="--ploidy" type="integer" value="2" label="Set baseline ploidy"/> + </section> + <section name="cnv" title="CNV calling options" expanded="true"> + <param argument="--sdrd" type="integer" value="2" label="Set minimum SD read-depth shift"/> + <expand macro="cnoffset" default="0.1"/> + <param name="cnvsize" type="integer" value="1000" label="Set minimum CNV size" help="(--cnv-size)"/> + <param argument="--svfile" type="data" format="bcf" optional="true" label="Select delly SV file for breakpoint refinement"/> <!-- filetype sv.bcf not supported by galaxy --> + <expand macro="vcffile"/> + <param argument="--segmentation" type="boolean" truevalue="--segmentation" falsevalue="" label="Use copy-number segmentation?"/> + </section> + <section name="read" title="Read-depth window options" expanded="true"> + <param name="windowsize" type="integer" value="10000" label="Set window size" help="(--window-size)"/> + <param name="windowoffset" type="integer" value="10000" label="Set window offset" help="(--window-offset)"/> + <param name="bedintervals" type="data" format="bed" optional="true" label="Select input BED file" help="(--bed-intervals)"/> + <param name="fractionwindow" type="float" min="0.0" max="1.0" value="0.25" label="Set minimum callable window fraction" help="(--fraction-window)"/> + <param name="adaptivewindowing" type="boolean" truevalue="-a" falsevalue="" label="Use mappable bases for window size?" help="(--adaptive-windowing)"/> + </section> + <section name="gc" title="GC fragment normalization options" expanded="true"> + <param name="scanwindow" type="integer" value="10000" label="Set scan window size" help="(--scan-window)"/> + <param name="fractionunique" type="float" min="0.0" max="1.0" value="0.8" label="Set uniqueness filter for scan windows" help="(--fraction-unique)"/> + <param name="scanregions" type="data" format="bed" optional="true" label="Select file with scanning regions" help="(--scan-regions)"/> + <param name="madcutoff" type="integer" value="3" label="Set count cutoff" help="(median + 3 * mad) (--mad-cutoff)"/> + <param argument="--percentile" type="float" min="0.0" max="1.0" value="0.0005" label="Set threshold for excluding extreme GC fraction"/> + <param name="nowindowselection" type="boolean" truevalue="-n" falsevalue="" label="Skip scan window selection?" help="(--no-window-selection)"/> + </section> + <section name="oo" title="Output options" expanded="true"> + <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)"> + <option value="cnv" selected="true">CNV</option> + <option value="coverage">Coverage</option> + <option value="log">Log</option> + </param> + </section> + </inputs> + <outputs> + <data name="out_cnv" format="bcf" from_work_dir="result.bcf" label="${tool.name} on ${on_string}: CNV"> + <filter>'cnv' in oo['out']</filter> + </data> + <data name="out_coverage" format="tabular.gz" from_work_dir="result.gz" label="${tool.name} on ${on_string}: Coverage"> + <filter>'coverage' in oo['out']</filter> + </data> + <expand macro="log"/> + </outputs> + <tests> + <!-- no test implemented for vcffile, svfile, bed-intervals, scanregions --> + + <!-- #1 default; test data to small, results are empty --> + <test expect_num_outputs="3"> + <param name="input" value="normal.bam"/> + <section name="generic"> + <param name="genome" value="genome.fasta"/> + <param name="mappability" value="map.fasta"/> + </section> + <section name="oo"> + <param name="out" value="cnv,coverage,log"/> + </section> + <output name="out_cnv"> + <assert_contents> + <has_size value="0"/> + </assert_contents> + </output> + <output name="out_coverage"> + <assert_contents> + <has_size value="0"/> + </assert_contents> + </output> + <output name="out_log"> + <assert_contents> + <has_text_matching expression=".+Scanning Windows"/> + <has_line line="***************************************************"/> + </assert_contents> + </output> + </test> + <!-- #2 --> + <test expect_num_outputs="3"> + <param name="input" value="normal.bam"/> + <section name="generic"> + <param name="genome" value="genome.fasta"/> + <param name="quality" value="11"/> + <param name="mappability" value="map.fasta"/> + <param name="ploidy" value="3"/> + </section> + <section name="cnv"> + <param name="sdrd" value="3"/> + <param name="cnoffset" value="0.2"/> + <param name="cnvsize" value="1001"/> + <param name="segmentation" value="true"/> + </section> + <section name="read"> + <param name="windowsize" value="10001"/> + <param name="windowoffset" value="9999"/> + <param name="fractionwindow" value="0.24"/> + <param name="adaptivewindowing" value="true"/> + </section> + <section name="gc"> + <param name="scanwindow" value="10001"/> + <param name="fractionunique" value="0.79"/> + <param name="madcutoff" value="2"/> + <param name="percentile" value="0.0006"/> + <param name="nowindowselection" value="true"/> + </section> + <section name="oo"> + <param name="out" value="cnv,coverage,log"/> + </section> + <output name="out_cnv"> + <assert_contents> + <has_size value="700" delta="10"/> + </assert_contents> + </output> + <output name="out_coverage"> + <assert_contents> + <has_size value="61"/> + </assert_contents> + </output> + <output name="out_log"> + <assert_contents> + <has_text_matching expression=".+Done.+"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +@WID@ + +**Input** + +Delly *cnv* requires are a sample (BAM), a genome (FASTA) and a mappability map (FASTA), which is available `here <https://gear.embl.de/data/delly/>`_. Intervals (BED), scanning regions (BED) and a delly SV file for breakpoint refinement (BCF) can be provided optionally. + +**Output** + +CNV (BCF) and coverage (compressed tabular) files are created. + +.. class:: infomark + +**References** + +@REFERENCES@ + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file
--- a/macros.xml Fri Jan 22 14:30:26 2021 +0000 +++ b/macros.xml Tue Aug 31 08:00:10 2021 +0000 @@ -16,7 +16,11 @@ <citation type="doi">10.1093/bioinformatics/bts378</citation> </citations> </xml> - + <xml name="bio_tools"> + <xrefs> + <xref type="bio.tools">delly2</xref> + </xrefs> + </xml> <!-- command --> <token name="@BAM@"><![CDATA[ @@ -148,4 +152,4 @@ <token name="@REFERENCES@"><![CDATA[ More information are available on `GitHub <https://github.com/dellytools/delly>`_. ]]></token> -</macros> \ No newline at end of file +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml.orig Tue Aug 31 08:00:10 2021 +0000 @@ -0,0 +1,162 @@ +<?xml version="1.0"?> +<macros> + <token name="@TOOL_VERSION@">0.8.7</token> + <token name="@VERSION_SUFFIX@">0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">delly</requirement> + <requirement type="package" version="1.10.2">bcftools</requirement> + </requirements> + </xml> + <xml name="version_command"> + <version_command><![CDATA[delly -v 2>&1 | grep 'Delly version' | cut -f 3 -d ' ']]></version_command> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1093/bioinformatics/bts378</citation> + </citations> + </xml> +<<<<<<< HEAD + + <!-- command --> +======= + <xml name="bio_tools"> + <xrefs> + <xref type='bio.tools'>delly2</xref> + </xrefs> + </xml> + <!-- + command + --> +>>>>>>> 20ed9dd6f (add bio.tools ID) + + <token name="@BAM@"><![CDATA[ +#for $i, $current in enumerate($input) + ln -s '${current}' 'input_${i}.bam' && + ln -s '${current.metadata.bam_index}' 'input_${i}.bam.bai' && +#end for + ]]></token> + <token name="@DUMP@"><![CDATA[ +#if 'dump' in $oo.out + && test -f 'dump.tsv.gz' && bgzip -d 'dump.tsv.gz' || echo 'No dump file.' +#end if + ]]></token> + <token name="@LOG@"><![CDATA[ +#if 'log' in $oo.out + |& tee '$out_log' +#end if + ]]></token> + <token name="@VCF@"><![CDATA[ +#if 'vcf' in $oo.out + && test -f 'result.bcf' && bcftools view 'result.bcf' > 'result.vcf' || echo 'No results.' +#end if + ]]></token> + + <!-- input --> + + <xml name="cnoffset" token_default=""> + <param name="cnoffset" type="float" min="0.0" max="1.0" value="@DEFAULT@" label="Set minimum CN offset" help="(--cn-offset)"/> + </xml> + <xml name="coverage" token_label=""> + <param argument="--coverage" type="integer" value="10" label="@LABEL@"/> + </xml> + <xml name="exclude"> + <param argument="--exclude" type="data" format="tabular" optional="true" label="Select file with regions to exclude"/> + </xml> + <xml name="genome"> + <param argument="--genome" type="data" format="fasta" label="Select genome file"/> + </xml> + <xml name="genoqual"> + <param name="genoqual" type="integer" value="5" label="Set minimum mapping quality for genotyping" help="(--geno-qual)"/> + </xml> + <xml name="input" token_format="" token_multiple="false" token_label=""> + <param name="input" type="data" format="@FORMAT@" multiple="@MULTIPLE@" label="@LABEL@"/> + </xml> + <xml name="maxreadsep" token_default=""> + <param argument="--maxreadsep" type="integer" value="@DEFAULT@" label="Set maximum read separation"/> + </xml> + <xml name="maxsize" token_default="" token_label=""> + <param argument="--maxsize" type="integer" value="@DEFAULT@" label="@LABEL@"/> + </xml> + <xml name="minclip"> + <param argument="--minclip" type="integer" value="25" label="Set minimum clipping length"/> + </xml> + <xml name="mincliquesize"> + <param name="mincliquesize" type="integer" value="2" label="Set minimum paired-end/single-read clique size" help="(--min-clique-size)"/> + </xml> + <xml name="minrefsep" token_default=""> + <param argument="--minrefsep" type="integer" value="@DEFAULT@" label="Set minimum reference separation"/> + </xml> + <xml name="minsize" token_default="" token_label=""> + <param argument="--minsize" type="integer" value="@DEFAULT@" label="@LABEL@"/> + </xml> + <xml name="pass"> + <param argument="--pass" type="boolean" truevalue="--pass" falsevalue="" label="Filter sites for PASS?"/> + </xml> + <xml name="ploidy"> + <param argument="--ploidy" type="integer" value="2" label="Set baseline ploidy"/> + </xml> + <xml name="samples"> + <param argument="--samples" type="data" format="tabular" label="Select sample file" help="Two-column sample file listing sample name and tumor or control."/> + </xml> + <xml name="svtype"> + <param argument="--svtype" type="select" label="Select type(s) of structural variants to detect"> + <option value="ALL" selected="true">All types (ALL)</option> + <option value="DEL">Deletion (DEL)</option> + <option value="DUP">Duplication (DUP)</option> + <option value="INS">Insertion (INS)</option> + <option value="INV">Inversion (INV)</option> + <option value="BND">Translocation (BND)</option> + </param> + </xml> + <xml name="vcffile"> + <param argument="--vcffile" type="data" format="bcf,vcf" optional="true" label="Select genotyping file"/> + </xml> + + <!-- output --> + + <xml name="bcf"> + <data name="out_bcf" format="bcf" from_work_dir="result.bcf" label="${tool.name} on ${on_string}: Result (BCF)"> + <filter>'bcf' in oo['out']</filter> + </data> + </xml> + <xml name="vcf"> + <data name="out_vcf" format="vcf" from_work_dir="result.vcf" label="${tool.name} on ${on_string}: Result (VCF)"> + <filter>'vcf' in oo['out']</filter> + </data> + </xml> + <xml name="dump"> + <data name="out_dump" format="tabular" from_work_dir="dump.tsv" label="${tool.name} on ${on_string}: SV-reads"> + <filter>'dump' in oo['out']</filter> + </data> + </xml> + <xml name="log"> + <data name="out_log" format="txt" label="${tool.name} on ${on_string}: Log"> + <filter>'log' in oo['out']</filter> + </data> + </xml> + + <!-- help --> + + <token name="@WID@"><![CDATA[ +Delly is an integrated structural variant (SV) prediction method that can discover, genotype and visualize deletions, tandem duplications, inversions and translocations at single-nucleotide resolution in short-read massively parallel sequencing data. It uses paired-ends, split-reads and read-depth to sensitively and accurately delineate genomic rearrangements throughout the genome. + +Short-read SV calling + +- *call* to discover and genotype structural variants +- *merge* structural variants across VCF/BCF files and within a single VCF/BCF file +- *filter* somatic or germline structural variants + +Long-read SV calling + +- *lr* for long-read SV discovery + +Copy-number variant calling + +- *cnv* to discover and genotype copy-number variants +- *classify* somatic or germline copy-number variants + ]]></token> + <token name="@REFERENCES@"><![CDATA[ +More information are available on `GitHub <https://github.com/dellytools/delly>`_. + ]]></token> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge.xml.orig Tue Aug 31 08:00:10 2021 +0000 @@ -0,0 +1,179 @@ +<?xml version="1.0"?> +<tool id="delly_merge" name="Delly merge" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="18.01"> +<<<<<<< HEAD + <description>structural variants across/within BCF/VCF file(s)</description> +======= + <description>structural variants across/within VCF/BCF file(s)</description> + <expand macro="bio_tools"/> +>>>>>>> 20ed9dd6f (add bio.tools ID) + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ +## initialize +#for $i, $current in enumerate($input) + #if $current.is_of_type('vcf') + bcftools view -Ob '$current' > 'input_${i}.bcf.gz' && + bcftools index 'input_${i}.bcf.gz' && + #else + ln -s '${current}' 'input_${i}.bcf.gz' && + ln -s '${current.metadata.bcf_index}' 'input_${i}.bcf.gz.csi' && + #end if +#end for + +## run +delly merge +## generic options +--outfile 'result.bcf' +--chunks $generic.chunks +--vaf $generic.vaf +--coverage $generic.coverage +--minsize $generic.minsize +--maxsize $generic.maxsize +$generic.cnvmode +$generic.precise +$generic.pass +## overlap options +--bp-offset $overlap.bpoffset +--rec-overlap $overlap.recoverlap +## input +#for $i, $current in enumerate($input) + 'input_${i}.bcf.gz' +#end for + +## postprocessing +@LOG@ +@VCF@ + ]]></command> + <inputs> + <expand macro="input" format="bcf,vcf" multiple="true" label="Select input files"/> + <section name="generic" title="Generic options" expanded="true"> + <param argument="--chunks" type="integer" value="500" label="Set maximum chunk size to merge groups of BCF files"/> + <param argument="--vaf" type="float" value="0.15" min="0.0" max="1.0" label="Set minimum fractional ALT support"/> + <expand macro="coverage" label="Set minimum coverage"/> + <expand macro="minsize" default="0" label="Set minimum SV size"/> + <expand macro="maxsize" default="1000000" label="Set maximum SV size"/> + <param argument="--cnvmode" type="boolean" truevalue="--cnvmode" falsevalue="" label="Merge Delly CNV files?"/> + <param argument="--precise" type="boolean" truevalue="--precise" falsevalue="" label="Filter sites for PRECISE?"/> + <expand macro="pass"/> + </section> + <section name="overlap" title="Overlap options" expanded="true"> + <param name="bpoffset" type="integer" value="1000" label="Set maximum breakpoint offset" help="(--bp-offset)"/> + <param name="recoverlap" type="float" value="0.8" label="Set minimum reciprocal overlap" help="(--rec-overlap)"/> + </section> + <section name="oo" title="Output options" expanded="true"> + <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)"> + <option value="bcf" selected="true">BCF</option> + <option value="log">Log</option> + <option value="vcf">VCF</option> + </param> + </section> + </inputs> + <outputs> + <expand macro="bcf"/> + <expand macro="log"/> + <expand macro="vcf"/> + </outputs> + <tests> + <!-- #1 bcf, default --> + <test expect_num_outputs="2"> + <param name="input" value="call_1.bcf.gz,call_2.bcf.gz"/> + <section name="oo"> + <param name="out" value="vcf,bcf"/> + </section> + <output name="out_bcf"> + <assert_contents> + <has_size value="1851" delta="10"/> + </assert_contents> + </output> + <output name="out_vcf"> + <assert_contents> + <has_n_lines n="128"/> + <has_line line="##fileformat=VCFv4.2"/> + <has_line line="#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO"/> + </assert_contents> + </output> + </test> + <!-- #2 bcf --> + <test expect_num_outputs="3"> + <param name="input" value="call_1.bcf.gz,call_2.bcf.gz"/> + <section name="generic"> + <param name="chunks" value="500"/> + <param name="vaf" value="0.16"/> + <param name="coverage" value="10"/> + <param name="minsize" value="0"/> + <param name="maxsize" value="1000000"/> + <param name="cnvmode" value="true"/> + <param name="precise" value="true"/> + <param name="pass" value="true"/> + </section> + <section name="overlap"> + <param name="bp-offset" value="1000"/> + <param name="rec-overlap" value="0.79"/> + </section> + <section name="oo"> + <param name="out" value="vcf,bcf,log"/> + </section> + <output name="out_bcf"> + <assert_contents> + <has_size value="1021" delta="10"/> + </assert_contents> + </output> + <output name="out_log"> + <assert_contents> + <has_text_matching expression=".+Done\."/> + </assert_contents> + </output> + <output name="out_vcf"> + <assert_contents> + <has_n_lines n="108"/> + <has_line line="##fileformat=VCFv4.2"/> + <has_line line="#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO"/> + </assert_contents> + </output> + </test> + <!-- #3 vcf, default --> + <test expect_num_outputs="2"> + <param name="input" value="call_1.vcf.gz,call_2.vcf.gz"/> + <section name="oo"> + <param name="out" value="vcf,bcf"/> + </section> + <output name="out_bcf"> + <assert_contents> + <has_size value="1851" delta="10"/> + </assert_contents> + </output> + <output name="out_vcf"> + <assert_contents> + <has_n_lines n="128"/> + <has_line line="##fileformat=VCFv4.2"/> + <has_line line="#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +@WID@ + +**Input** + +Delly *merge* requires BCF or VCF files. + +**Output** + +A single file in BCF/VCF format. Additionally a log file is provided. + +.. class:: infomark + +**References** + +@REFERENCES@ + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file