changeset 0:ee65e517769d draft

planemo upload for repository https://github.com/youyuh48/galaxy-tools/tree/master/tools/sniffles
author youyuh48
date Sun, 25 Aug 2019 01:58:51 -0400
parents
children ba3aaae7a229
files sniffles.xml test-data/expected_output.vcf test-data/reads_region.bam
diffstat 3 files changed, 260 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sniffles.xml	Sun Aug 25 01:58:51 2019 -0400
@@ -0,0 +1,142 @@
+<tool id="sniffles" name="sniffles" version="0.1.0">
+  <description>Structural variation caller using third generation sequencing</description>
+  <requirements>
+    <requirement type="package" version="1.0.11">sniffles</requirement>
+  </requirements>
+  <command detect_errors="exit_code">
+    <![CDATA[
+sniffles
+-t \${GALAXY_SLOTS:-2}
+-m '$input'
+--${output_format} '$output'
+## general_options
+#if $general_options.s:
+    -s $general_options.s
+#end if
+#if $general_options.max_num_splits:
+    --max_num_splits $max_num_splits
+#end if
+#if $general_options.l:
+    -l $general_options.l
+#end if
+#if $general_options.q:
+    -q $general_options.q
+#end if
+#if $general_options.n:
+    -n $general_options.n
+#end if
+#if $general_options.r:
+    -r $general_options.r
+#end if
+    ]]>
+  </command>
+  <inputs>
+    <param type="data" name="input" format="bam" label="Input BAM file"/>
+    <param name="output_format" type="select" label="Select an output format">
+      <option value="vcf">VCF</option>
+      <option value="bedpe">bedpe</option>
+    </param>
+    <section name="general_options" title="Set general options" expanded="False">
+      <param argument="-s" type="integer" value="" optional="true" label="Minimum number of reads that support a SV. [10]"/>
+      <param argument="--max_num_splits" name="max_num_splits" type="integer" value="" optional="true" label="Maximum number of splits per read to be still taken into account. [7]"/>
+      <param argument="-l" type="integer" value="" optional="true" label="Minimum length of SV to be reported. [30]"/>
+      <param argument="-q" type="integer" value="" optional="true" label="Minimum Mapping Quality. [20]"/>
+      <param argument="-n" type="integer" value="" optional="true" label="Report up to N reads that support the SV in the vcf file. -1: report all. [0]"/>
+      <param argument="-r" type="integer" value="" optional="true" label="Discard read if non of its segment is larger then this. [2000]"/>
+    </section>
+  </inputs>
+  <outputs>
+    <data name="output" format="vcf" label="${tool.name} on ${on_string} (${output_format} format)">
+      <change_format>
+        <when input="output_format" value="bedpe" format="tabular"/>
+      </change_format>
+    </data>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" value="reads_region.bam"/>
+      <param name="option" value="--vcf"/>
+      <output name="output" file="expected_output.vcf"/>
+    </test>
+  </tests>
+  <help>
+    <![CDATA[
+Usage: sniffles [options] -m <sorted.bam> -v <output.vcf>
+
+Input/Output:
+    -m <string>,  --mapped_reads <string>
+        (required)  Sorted bam File
+    -v <string>,  --vcf <string>
+        VCF output file name []
+    -b <string>,  --bedpe <string>
+         bedpe output file name []
+    \--Ivcf <string>
+        Input VCF file name. Enable force calling []
+    \--tmp_file <string>
+        path to temporary file otherwise Sniffles will use the current directory. []
+
+General:
+    -s <int>,  --min_support <int>
+        Minimum number of reads that support a SV. [10]
+    \--max_num_splits <int>
+        Maximum number of splits per read to be still taken into account. [7]
+    -d <int>,  --max_distance <int>
+        Maximum distance to group SV together. [1000]
+    -t <int>,  --threads <int>
+        Number of threads to use. [3]
+    -l <int>,  --min_length <int>
+        Minimum length of SV to be reported. [30]
+    -q <int>,  --minmapping_qual <int>
+        Minimum Mapping Quality. [20]
+    -n <int>,  --num_reads_report <int>
+        Report up to N reads that support the SV in the vcf file. -1: report all. [0]
+    -r <int>,  --min_seq_size <int>
+        Discard read if non of its segment is larger then this. [2000]
+    -z <int>,  --min_zmw <int>
+        Discard SV that are not supported by at least x zmws. This applies only for PacBio recognizable reads. [0]
+    \--cs_string
+        Enables the scan of CS string instead of Cigar and MD.  [false]
+
+Clustering/phasing and genotyping:
+    \--genotype
+        Enables Sniffles to compute the genotypes. [false]
+    \--cluster
+        Enables Sniffles to phase SVs that occur on the same reads [false]
+    \--cluster_support <int>
+        Minimum number of reads supporting clustering of SV. [1]
+    -f <float>,  --allelefreq <float>
+        Threshold on allele frequency (0-1).  [0]
+    \--min_homo_af <float>
+        Threshold on allele frequency (0-1).  [0.8]
+    \--min_het_af <float>
+        Threshold on allele frequency (0-1).  [0.3]
+
+Advanced:
+    \--report_BND
+        Dont report BND instead use Tra in vcf output.  [true]
+    \--report_seq
+        Report sequences for indels in vcf output. (Beta version!)  [false]
+    \--ignore_sd
+        Ignores the sd based filtering.  [false]
+    \--report_read_strands
+        Enables the report of the strand categories per read. (Beta)  [false]
+    \--ccs_reads
+        Preset CCS Pacbio setting. (Beta)  [false]
+
+Parameter estimation:
+    \--skip_parameter_estimation
+        Enables the scan if only very few reads are present.  [false]
+    \--del_ratio <float>
+        Estimated ration of deletions per read (0-1).  [0.0458369]
+    \--ins_ratio <float>
+        Estimated ratio of insertions per read (0-1).  [0.049379]
+    \--max_diff_per_window <int>
+        Maximum differences per 100bp. [50]
+    \--max_dist_aln_events <int>
+        Maximum distance between alignment (indel) events. [4]
+    ]]>
+  </help>
+    <citations>
+        <citation type="doi">10.1038/s41592-018-0001-7</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_output.vcf	Sun Aug 25 01:58:51 2019 -0400
@@ -0,0 +1,118 @@
+##fileformat=VCFv4.2
+##source=Sniffles
+##fileDate=20180307
+##contig=<ID=1,length=249250621>
+##contig=<ID=2,length=243199373>
+##contig=<ID=3,length=198022430>
+##contig=<ID=4,length=191154276>
+##contig=<ID=5,length=180915260>
+##contig=<ID=6,length=171115067>
+##contig=<ID=7,length=159138663>
+##contig=<ID=8,length=146364022>
+##contig=<ID=9,length=141213431>
+##contig=<ID=10,length=135534747>
+##contig=<ID=11,length=135006516>
+##contig=<ID=12,length=133851895>
+##contig=<ID=13,length=115169878>
+##contig=<ID=14,length=107349540>
+##contig=<ID=15,length=102531392>
+##contig=<ID=16,length=90354753>
+##contig=<ID=17,length=81195210>
+##contig=<ID=18,length=78077248>
+##contig=<ID=19,length=59128983>
+##contig=<ID=20,length=63025520>
+##contig=<ID=21,length=48129895>
+##contig=<ID=22,length=51304566>
+##contig=<ID=X,length=155270560>
+##contig=<ID=Y,length=59373566>
+##contig=<ID=MT,length=16569>
+##contig=<ID=GL000207.1,length=4262>
+##contig=<ID=GL000226.1,length=15008>
+##contig=<ID=GL000229.1,length=19913>
+##contig=<ID=GL000231.1,length=27386>
+##contig=<ID=GL000210.1,length=27682>
+##contig=<ID=GL000239.1,length=33824>
+##contig=<ID=GL000235.1,length=34474>
+##contig=<ID=GL000201.1,length=36148>
+##contig=<ID=GL000247.1,length=36422>
+##contig=<ID=GL000245.1,length=36651>
+##contig=<ID=GL000197.1,length=37175>
+##contig=<ID=GL000203.1,length=37498>
+##contig=<ID=GL000246.1,length=38154>
+##contig=<ID=GL000249.1,length=38502>
+##contig=<ID=GL000196.1,length=38914>
+##contig=<ID=GL000248.1,length=39786>
+##contig=<ID=GL000244.1,length=39929>
+##contig=<ID=GL000238.1,length=39939>
+##contig=<ID=GL000202.1,length=40103>
+##contig=<ID=GL000234.1,length=40531>
+##contig=<ID=GL000232.1,length=40652>
+##contig=<ID=GL000206.1,length=41001>
+##contig=<ID=GL000240.1,length=41933>
+##contig=<ID=GL000236.1,length=41934>
+##contig=<ID=GL000241.1,length=42152>
+##contig=<ID=GL000243.1,length=43341>
+##contig=<ID=GL000242.1,length=43523>
+##contig=<ID=GL000230.1,length=43691>
+##contig=<ID=GL000237.1,length=45867>
+##contig=<ID=GL000233.1,length=45941>
+##contig=<ID=GL000204.1,length=81310>
+##contig=<ID=GL000198.1,length=90085>
+##contig=<ID=GL000208.1,length=92689>
+##contig=<ID=GL000191.1,length=106433>
+##contig=<ID=GL000227.1,length=128374>
+##contig=<ID=GL000228.1,length=129120>
+##contig=<ID=GL000214.1,length=137718>
+##contig=<ID=GL000221.1,length=155397>
+##contig=<ID=GL000209.1,length=159169>
+##contig=<ID=GL000218.1,length=161147>
+##contig=<ID=GL000220.1,length=161802>
+##contig=<ID=GL000213.1,length=164239>
+##contig=<ID=GL000211.1,length=166566>
+##contig=<ID=GL000199.1,length=169874>
+##contig=<ID=GL000217.1,length=172149>
+##contig=<ID=GL000216.1,length=172294>
+##contig=<ID=GL000215.1,length=172545>
+##contig=<ID=GL000205.1,length=174588>
+##contig=<ID=GL000219.1,length=179198>
+##contig=<ID=GL000224.1,length=179693>
+##contig=<ID=GL000223.1,length=180455>
+##contig=<ID=GL000195.1,length=182896>
+##contig=<ID=GL000212.1,length=186858>
+##contig=<ID=GL000222.1,length=186861>
+##contig=<ID=GL000200.1,length=187035>
+##contig=<ID=GL000193.1,length=189789>
+##contig=<ID=GL000194.1,length=191469>
+##contig=<ID=GL000225.1,length=211173>
+##contig=<ID=GL000192.1,length=547496>
+##contig=<ID=NC_007605,length=171823>
+##contig=<ID=hs37d5,length=35477943>
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
+##ALT=<ID=TRA,Description="Translocation">
+##ALT=<ID=INS,Description="Insertion">
+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
+##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends">
+##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
+##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=STD_quant_start,Number=A,Type=Integer,Description="STD of the start breakpoints across the reads.">
+##INFO=<ID=STD_quant_stop,Number=A,Type=Integer,Description="STD of the stop breakpoints across the reads.">
+##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Integer,Description="Kurtosis value of the start breakpoints accross the reads.">
+##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Integer,Description="Kurtosis value of the stop breakpoints accross the reads.">
+##INFO=<ID=SUPTYPE,Number=1,Type=String,Description="Type by which the variant is supported.(SR,ALN)">
+##INFO=<ID=SUPTYPE,Number=1,Type=String,Description="Type by which the variant is supported.(SR,ALN)">
+##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency.">
+##INFO=<ID=ZMW,Number=A,Type=Integer,Description="Number of ZMWs (Pacbio) supporting SV.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads">
+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	reads_region.bam
+21	21492142	0	N	<DEL>	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.8;CHR2=21;END=21492648;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=0.572582;Kurtosis_quant_stop=1.417662;SVTYPE=DEL;SUPTYPE=AL,SR;SVLEN=506;STRANDS=+-;RE=48	GT:DR:DV	./.:.:48
Binary file test-data/reads_region.bam has changed