annotate downsample.xml @ 1:03aeb837e398 draft default tip

Uploaded
author dave
date Tue, 01 Oct 2019 16:25:02 -0400
parents 20823bce09e7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
1 <?xml version="1.0"?>
1
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
2 <tool id="dynamic_downsample" name="Downsample" version="1.0.0">
0
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
3 <description>reads to desired coverage</description>
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
4 <requirements>
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
5 <requirement type="package" version="1.9">samtools</requirement>
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
6 <requirement type="package" version="5.0.1">gawk</requirement>
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
7 </requirements>
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
8 <command><![CDATA[
1
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
9 if FACTOR=\$(samtools depth '$reads' | awk '{ readcovs[x++]=\$3; } END { n = asort(readcovs) ; idx=int((x+1)/2) ; coverage = ((idx==(x+1)/2) ? readcovs[idx] : (readcovs[idx]+readcovs[idx+1])/2) ; factor = 1/(coverage/$target_coverage) ; if (factor >= 1) exit 1 ; else print factor }') ;
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
10 then samtools view '$reads' -s \$FACTOR -O BAM -o '$output' -@ \${GALAXY_SLOTS:-1} ;
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
11 else samtools view -O BAM '$reads' -o '$output' ;
0
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
12 fi
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
13 ]]>
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
14 </command>
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
15 <inputs>
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
16 <param name="reads" type="data" format="sam,bam" label="Reads to downsample" />
1
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
17 <param name="target_coverage" type="integer" value="1000" label="Target coverage" />
0
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
18 </inputs>
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
19 <outputs>
1
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
20 <data format="bam" name="output" label="Downsample ${on_string} to ${target_coverage}x coverage" />
0
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
21 </outputs>
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
22 <tests>
1
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
23 <test>
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
24 <param name="reads" ftype="bam" value="downsample-in1.bam" />
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
25 <param name="target_coverage" value="100" />
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
26 <output name="output" file="downsample-out1.bam" />
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
27 </test>
0
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
28 </tests>
1
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
29 <help><![CDATA[
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
30 .. role:: bash(code)
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
31 :language: bash
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
32
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
33
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
34 Dynamic Downsampling
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
35 ~~~~~~~~~~~~~~~~~~~~
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
36
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
37 A known issue with variant analysis is that when small genomes are sequenced,
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
38 e.g. HIV at 9.7 kilobases or the human mitochondria at 16.6kb, the resulting
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
39 coverage can easily exceed 10,000x. This can cause performance issues for some
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
40 variant callers, especially those that employ a haplotyping approach to variant
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
41 detection.
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
42
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
43 This tool attempts to ameliorate that issue by downsampling its input files to
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
44 the target coverage using :bash:`samtools depth` to determine the median
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
45 coverage for a given BAM file, then running :bash:`samtools view -s` on the file
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
46 if 1 / (median coverage / desired coverage) is less than 1.
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
47
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
48 .. code-block:: bash
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
49
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
50 -s FLOAT subsample reads (given INT.FRAC option value, 0.FRAC is the fraction of templates/read pairs to keep; INT part sets seed)
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
51
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
52 The median coverage is determined by passing the :bash:`samtools depth` command
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
53 through the following :bash:`awk` script, where :bash:`$target_coverage` is the
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
54 value specified in the tool form:
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
55
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
56 .. code-block:: awk
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
57
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
58 '{ readcovs[x++]=$3; } END
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
59 {
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
60 n = asort(readcovs) ;
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
61 idx=int((x+1)/2) ;
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
62 coverage = ((idx==(x+1)/2) ? readcovs[idx] : (readcovs[idx]+readcovs[idx+1])/2) ;
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
63 factor = 1/(coverage/$target_coverage) ;
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
64 if (factor >= 1) exit 1 ;
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
65 else print factor
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
66 }'
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
67
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
68 On an exit code of 1, the tool will simply copy the input to the output without
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
69 altering it. If the :bash:`awk` step returns a value instead, the tool then runs
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
70 :bash:`samtools view -s 1 / (median coverage / desired coverage)`
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
71
03aeb837e398 Uploaded
dave
parents: 0
diff changeset
72 ]]>
0
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
73 </help>
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
74 <citations>
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
75 </citations>
20823bce09e7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/downsample commit dd00e60bdbb1fb45d395bb59dd60795e39867624
dave
parents:
diff changeset
76 </tool>