annotate rgPicardMarkDups.xml @ 0:ff4ec13e496e draft

Uploaded tarball to repository
author devteam
date Tue, 23 Oct 2012 10:49:35 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
1 <tool name="Mark Duplicate reads" id="rgPicardMarkDups" version="1.56.0">
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
2 <command interpreter="python">
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
3 picard_wrapper.py -i "$input_file" -n "$out_prefix" --tmpdir "${__new_file_path__}" -o "$out_file"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
4 --remdups "$remDups" --assumesorted "$assumeSorted" --readregex "$readRegex" --optdupdist "$optDupeDist"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
5 -j "\$JAVA_JAR_PATH/MarkDuplicates.jar" -d "$html_file.files_path" -t "$html_file" -e "$input_file.ext"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
6 </command>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
7 <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
8 <inputs>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
9 <param format="bam,sam" name="input_file" type="data" label="SAM/BAM dataset to mark duplicates in"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
10 help="If empty, upload or import a SAM/BAM dataset."/>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
11 <param name="out_prefix" value="Dupes Marked" type="text"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
12 label="Title for the output file" help="Use this remind you what the job was for" size="80" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
13 <param name="remDups" value="false" type="boolean" label="Remove duplicates from output file"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
14 truevalue="true" falsevalue="false" checked="yes"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
15 help="If true do not write duplicates to the output file instead of writing them with appropriate flags set." />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
16 <param name="assumeSorted" value="true" type="boolean" label="Assume reads are already ordered"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
17 truevalue="true" falsevalue="false" checked="yes"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
18 help="If true assume input data are already sorted (most Galaxy SAM/BAM should be)." />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
19 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" type="text" size="80"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
20 label="Regular expression that can be used to parse read names in the incoming SAM file"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
21 help="Names are parsed to extract: tile/region, x coordinate and y coordinate, to estimate optical duplication rate" >
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
22 <sanitizer>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
23 <valid initial="string.printable">
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
24 <remove value="&apos;"/>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
25 </valid>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
26 <mapping initial="none">
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
27 <add source="&apos;" target="__sq__"/>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
28 </mapping>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
29 </sanitizer>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
30 </param>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
31 <param name="optDupeDist" value="100" type="integer"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
32 label="The maximum offset between two duplicate clusters in order to consider them optical duplicates." size="5"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
33 help="e.g. 5-10 pixels. Later Illumina software versions multiply pixel values by 10, in which case 50-100." >
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
34 <validator type="in_range" message="Minimum optical dupe distance must be positive" min="0" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
35 </param>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
36
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
37 </inputs>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
38 <outputs>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
39 <data format="bam" name="out_file" label="MarkDups_${out_prefix}.bam"/>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
40 <data format="html" name="html_file" label="MarkDups_${out_prefix}.html"/>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
41 </outputs>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
42 <tests>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
43 <test>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
44 <param name="input_file" value="picard_input_tiny_coord.bam" ftype="bam" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
45 <param name="out_prefix" value="Dupes Marked" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
46 <param name="remDups" value="false" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
47 <param name="assumeSorted" value="true" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
48 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
49 <param name="optDupeDist" value="100" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
50 <output name="out_file" file="picard_output_markdups_sortedpairsam.bam" ftype="bam" compare="diff" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
51 <output name="html_file" file="picard_output_markdups_sortedpairsam.html" ftype="html" lines_diff="75" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
52 </test>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
53 <test>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
54 <param name="input_file" value="picard_input_tiny_coord.sam" ftype="sam" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
55 <param name="out_prefix" value="Dupes Marked" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
56 <param name="remDups" value="true" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
57 <param name="assumeSorted" value="true" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
58 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
59 <param name="optDupeDist" value="100" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
60 <output name="out_file" file="picard_output_markdups_remdupes.bam" ftype="bam" compare="diff" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
61 <output name="html_file" file="picard_output_markdups_sortedpairsam.html" ftype="html" lines_diff="75" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
62 </test>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
63 </tests>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
64
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
65 <help>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
66
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
67 .. class:: infomark
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
68
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
69 **Purpose**
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
70
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
71 Marks all duplicate reads in a provided SAM or BAM file and either removes them or flags them.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
72
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
73 **Picard documentation**
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
74
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
75 This is a Galaxy wrapper for MarkDuplicates, a part of the external package Picard-tools_.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
76
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
77 .. _Picard-tools: http://www.google.com/search?q=picard+samtools
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
78
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
79 -----
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
80
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
81 .. class:: infomark
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
82
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
83 **Inputs, outputs, and parameters**
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
84
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
85 Picard documentation says (reformatted for Galaxy):
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
86
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
87 .. csv-table:: Mark Duplicates docs
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
88 :header-rows: 1
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
89
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
90 Option,Description
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
91 "INPUT=File","The input SAM or BAM file to analyze. Must be coordinate sorted. Required."
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
92 "OUTPUT=File","The output file to right marked records to Required."
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
93 "METRICS_FILE=File","File to write duplication metrics to Required."
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
94 "REMOVE_DUPLICATES=Boolean","If true do not write duplicates to the output file instead of writing them with appropriate flags set. Default value: false."
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
95 "ASSUME_SORTED=Boolean","If true, assume that the input file is coordinate sorted, even if the header says otherwise. Default value: false."
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
96 "MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=Integer","This option is obsolete. ReadEnds will always be spilled to disk. Default value: 50000."
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
97 "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=Integer","Maximum number of file handles to keep open when spilling read ends to disk."
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
98 "READ_NAME_REGEX=String","Regular expression that can be used to parse read names in the incoming SAM file. Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. "
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
99 "OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer","The maximum offset between two duplicte clusters in order to consider them optical duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels) unless using later versions of the Illumina pipeline that multiply pixel values by 10, in which case 50-100 is more normal. Default value: 100"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
100
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
101 .. class:: warningmark
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
102
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
103 **Warning on SAM/BAM quality**
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
104
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
105 Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
106 flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
107 to be the only way to deal with SAM/BAM that cannot be parsed.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
108 .. class:: infomark
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
109
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
110 **Note on the Regular Expression**
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
111
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
112 (from the Picard docs)
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
113 This tool requires a valid regular expression to parse out the read names in the incoming SAM or BAM file. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. The regular expression should contain three capture groups for the three variables, in order. Default value: [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
114
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
115 Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. All records are then written to the output file with the duplicate records flagged unless the remove duplicates option is selected. In some cases you may want to do this, but please only do this if you really understand what you are doing.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
116
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
117 </help>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
118 </tool>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
119
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
120
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
121
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
122
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
123
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
124
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
125
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
126
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
127
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
128
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
129
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
130