annotate picard_MarkDuplicates.xml @ 128:f2604c713ebd draft

Uploaded
author devteam
date Wed, 26 Feb 2014 00:33:29 -0500
parents 8d15620a9420
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
122
8d15620a9420 Uploaded
devteam
parents:
diff changeset
1 <tool name="Mark Duplicate reads" id="rgPicardMarkDups" version="1.106.0">
8d15620a9420 Uploaded
devteam
parents:
diff changeset
2 <description>locates duplicate molecules</description>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
3 <command interpreter="python">
8d15620a9420 Uploaded
devteam
parents:
diff changeset
4 picard_wrapper.py -i "${input_file}" -n "${out_prefix}" --tmpdir "${__new_file_path__}" -o "${out_file}"
8d15620a9420 Uploaded
devteam
parents:
diff changeset
5 --remdups "${remDups}" --assumesorted "${assumeSorted}" --readregex "${readRegex}" --optdupdist "${optDupeDist}"
8d15620a9420 Uploaded
devteam
parents:
diff changeset
6 -j "\$JAVA_JAR_PATH/MarkDuplicates.jar" -d "${html_file.files_path}" -t "${html_file}" -e "${input_file.ext}"
8d15620a9420 Uploaded
devteam
parents:
diff changeset
7 </command>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
8 <requirements><requirement type="package" version="1.106.0">picard</requirement></requirements>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
9 <inputs>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
10 <param format="bam,sam" name="input_file" type="data" label="SAM/BAM dataset to mark duplicates in"
8d15620a9420 Uploaded
devteam
parents:
diff changeset
11 help="If empty, upload or import a SAM/BAM dataset."/>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
12 <param name="out_prefix" value="Dupes Marked" type="text"
8d15620a9420 Uploaded
devteam
parents:
diff changeset
13 label="Title for the output file" help="Use this remind you what the job was for" size="80" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
14 <param name="remDups" value="false" type="boolean" label="Remove duplicates from output file"
8d15620a9420 Uploaded
devteam
parents:
diff changeset
15 truevalue="true" falsevalue="false" checked="yes"
8d15620a9420 Uploaded
devteam
parents:
diff changeset
16 help="If true do not write duplicates to the output file instead of writing them with appropriate flags set." />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
17 <param name="assumeSorted" value="true" type="boolean" label="Assume reads are already ordered"
8d15620a9420 Uploaded
devteam
parents:
diff changeset
18 truevalue="true" falsevalue="false" checked="yes"
8d15620a9420 Uploaded
devteam
parents:
diff changeset
19 help="If true assume input data are already sorted (most Galaxy SAM/BAM should be)." />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
20 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" type="text" size="80"
8d15620a9420 Uploaded
devteam
parents:
diff changeset
21 label="Regular expression that can be used to parse read names in the incoming SAM file"
8d15620a9420 Uploaded
devteam
parents:
diff changeset
22 help="Names are parsed to extract: tile/region, x coordinate and y coordinate, to estimate optical duplication rate" >
8d15620a9420 Uploaded
devteam
parents:
diff changeset
23 <sanitizer>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
24 <valid initial="string.printable">
8d15620a9420 Uploaded
devteam
parents:
diff changeset
25 <remove value="&apos;"/>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
26 </valid>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
27 <mapping initial="none">
8d15620a9420 Uploaded
devteam
parents:
diff changeset
28 <add source="&apos;" target="__sq__"/>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
29 </mapping>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
30 </sanitizer>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
31 </param>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
32 <param name="optDupeDist" value="100" type="integer"
8d15620a9420 Uploaded
devteam
parents:
diff changeset
33 label="The maximum offset between two duplicate clusters in order to consider them optical duplicates." size="5"
8d15620a9420 Uploaded
devteam
parents:
diff changeset
34 help="e.g. 5-10 pixels. Later Illumina software versions multiply pixel values by 10, in which case 50-100." >
8d15620a9420 Uploaded
devteam
parents:
diff changeset
35 <validator type="in_range" message="Minimum optical dupe distance must be positive" min="0" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
36 </param>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
37
8d15620a9420 Uploaded
devteam
parents:
diff changeset
38 </inputs>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
39 <outputs>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
40 <data format="bam" name="out_file" label="MarkDups_${out_prefix}.bam"/>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
41 <data format="html" name="html_file" label="MarkDups_${out_prefix}.html"/>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
42 </outputs>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
43 <tests>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
44 <test>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
45 <param name="input_file" value="picard_input_tiny_coord.bam" ftype="bam" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
46 <param name="out_prefix" value="Dupes Marked" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
47 <param name="remDups" value="false" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
48 <param name="assumeSorted" value="true" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
49 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
50 <param name="optDupeDist" value="100" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
51 <output name="out_file" file="picard_output_markdups_sortedpairsam.bam" ftype="bam" compare="diff" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
52 <output name="html_file" file="picard_output_markdups_sortedpairsam.html" ftype="html" lines_diff="75" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
53 </test>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
54 <test>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
55 <param name="input_file" value="picard_input_tiny_coord.sam" ftype="sam" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
56 <param name="out_prefix" value="Dupes Marked" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
57 <param name="remDups" value="true" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
58 <param name="assumeSorted" value="true" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
59 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
60 <param name="optDupeDist" value="100" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
61 <output name="out_file" file="picard_output_markdups_remdupes.bam" ftype="bam" compare="diff" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
62 <output name="html_file" file="picard_output_markdups_sortedpairsam.html" ftype="html" lines_diff="75" />
8d15620a9420 Uploaded
devteam
parents:
diff changeset
63 </test>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
64 </tests>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
65
8d15620a9420 Uploaded
devteam
parents:
diff changeset
66 <help>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
67
8d15620a9420 Uploaded
devteam
parents:
diff changeset
68 .. class:: infomark
8d15620a9420 Uploaded
devteam
parents:
diff changeset
69
8d15620a9420 Uploaded
devteam
parents:
diff changeset
70 **Purpose**
8d15620a9420 Uploaded
devteam
parents:
diff changeset
71
8d15620a9420 Uploaded
devteam
parents:
diff changeset
72 Marks all duplicate reads in a provided SAM or BAM file and either removes them or flags them.
8d15620a9420 Uploaded
devteam
parents:
diff changeset
73
8d15620a9420 Uploaded
devteam
parents:
diff changeset
74 **Picard documentation**
8d15620a9420 Uploaded
devteam
parents:
diff changeset
75
8d15620a9420 Uploaded
devteam
parents:
diff changeset
76 This is a Galaxy wrapper for MarkDuplicates, a part of the external package Picard-tools_.
8d15620a9420 Uploaded
devteam
parents:
diff changeset
77
8d15620a9420 Uploaded
devteam
parents:
diff changeset
78 .. _Picard-tools: http://www.google.com/search?q=picard+samtools
8d15620a9420 Uploaded
devteam
parents:
diff changeset
79
8d15620a9420 Uploaded
devteam
parents:
diff changeset
80 -----
8d15620a9420 Uploaded
devteam
parents:
diff changeset
81
8d15620a9420 Uploaded
devteam
parents:
diff changeset
82 .. class:: infomark
8d15620a9420 Uploaded
devteam
parents:
diff changeset
83
8d15620a9420 Uploaded
devteam
parents:
diff changeset
84 **Inputs, outputs, and parameters**
8d15620a9420 Uploaded
devteam
parents:
diff changeset
85
8d15620a9420 Uploaded
devteam
parents:
diff changeset
86 Picard documentation says (reformatted for Galaxy):
8d15620a9420 Uploaded
devteam
parents:
diff changeset
87
8d15620a9420 Uploaded
devteam
parents:
diff changeset
88 .. csv-table:: Mark Duplicates docs
8d15620a9420 Uploaded
devteam
parents:
diff changeset
89 :header-rows: 1
8d15620a9420 Uploaded
devteam
parents:
diff changeset
90
8d15620a9420 Uploaded
devteam
parents:
diff changeset
91 Option,Description
8d15620a9420 Uploaded
devteam
parents:
diff changeset
92 "INPUT=File","The input SAM or BAM file to analyze. Must be coordinate sorted. Required."
8d15620a9420 Uploaded
devteam
parents:
diff changeset
93 "OUTPUT=File","The output file to right marked records to Required."
8d15620a9420 Uploaded
devteam
parents:
diff changeset
94 "METRICS_FILE=File","File to write duplication metrics to Required."
8d15620a9420 Uploaded
devteam
parents:
diff changeset
95 "REMOVE_DUPLICATES=Boolean","If true do not write duplicates to the output file instead of writing them with appropriate flags set. Default value: false."
8d15620a9420 Uploaded
devteam
parents:
diff changeset
96 "ASSUME_SORTED=Boolean","If true, assume that the input file is coordinate sorted, even if the header says otherwise. Default value: false."
8d15620a9420 Uploaded
devteam
parents:
diff changeset
97 "MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=Integer","This option is obsolete. ReadEnds will always be spilled to disk. Default value: 50000."
8d15620a9420 Uploaded
devteam
parents:
diff changeset
98 "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=Integer","Maximum number of file handles to keep open when spilling read ends to disk."
8d15620a9420 Uploaded
devteam
parents:
diff changeset
99 "READ_NAME_REGEX=String","Regular expression that can be used to parse read names in the incoming SAM file. Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. "
8d15620a9420 Uploaded
devteam
parents:
diff changeset
100 "OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer","The maximum offset between two duplicte clusters in order to consider them optical duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels) unless using later versions of the Illumina pipeline that multiply pixel values by 10, in which case 50-100 is more normal. Default value: 100"
8d15620a9420 Uploaded
devteam
parents:
diff changeset
101
8d15620a9420 Uploaded
devteam
parents:
diff changeset
102 .. class:: warningmark
8d15620a9420 Uploaded
devteam
parents:
diff changeset
103
8d15620a9420 Uploaded
devteam
parents:
diff changeset
104 **Warning on SAM/BAM quality**
8d15620a9420 Uploaded
devteam
parents:
diff changeset
105
8d15620a9420 Uploaded
devteam
parents:
diff changeset
106 Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
8d15620a9420 Uploaded
devteam
parents:
diff changeset
107 flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
8d15620a9420 Uploaded
devteam
parents:
diff changeset
108 to be the only way to deal with SAM/BAM that cannot be parsed.
8d15620a9420 Uploaded
devteam
parents:
diff changeset
109 .. class:: infomark
8d15620a9420 Uploaded
devteam
parents:
diff changeset
110
8d15620a9420 Uploaded
devteam
parents:
diff changeset
111 **Note on the Regular Expression**
8d15620a9420 Uploaded
devteam
parents:
diff changeset
112
8d15620a9420 Uploaded
devteam
parents:
diff changeset
113 (from the Picard docs)
8d15620a9420 Uploaded
devteam
parents:
diff changeset
114 This tool requires a valid regular expression to parse out the read names in the incoming SAM or BAM file. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. The regular expression should contain three capture groups for the three variables, in order. Default value: [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).
8d15620a9420 Uploaded
devteam
parents:
diff changeset
115
8d15620a9420 Uploaded
devteam
parents:
diff changeset
116 Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. All records are then written to the output file with the duplicate records flagged unless the remove duplicates option is selected. In some cases you may want to do this, but please only do this if you really understand what you are doing.
8d15620a9420 Uploaded
devteam
parents:
diff changeset
117
8d15620a9420 Uploaded
devteam
parents:
diff changeset
118 </help>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
119 </tool>
8d15620a9420 Uploaded
devteam
parents:
diff changeset
120
8d15620a9420 Uploaded
devteam
parents:
diff changeset
121
8d15620a9420 Uploaded
devteam
parents:
diff changeset
122
8d15620a9420 Uploaded
devteam
parents:
diff changeset
123
8d15620a9420 Uploaded
devteam
parents:
diff changeset
124
8d15620a9420 Uploaded
devteam
parents:
diff changeset
125
8d15620a9420 Uploaded
devteam
parents:
diff changeset
126
8d15620a9420 Uploaded
devteam
parents:
diff changeset
127
8d15620a9420 Uploaded
devteam
parents:
diff changeset
128
8d15620a9420 Uploaded
devteam
parents:
diff changeset
129
8d15620a9420 Uploaded
devteam
parents:
diff changeset
130
8d15620a9420 Uploaded
devteam
parents:
diff changeset
131