annotate rgPicardHsMetrics.xml @ 2:93ace7e49295 draft

Uploaded valid tools.
author devteam
date Fri, 21 Feb 2014 12:06:18 -0500
parents ff4ec13e496e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
1 <tool name="SAM/BAM Hybrid Selection Metrics" id="PicardHsMetrics" version="1.56.0">
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
2 <description>for targeted resequencing data</description>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
3 <command interpreter="python">
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
4
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
5 picard_wrapper.py -i "$input_file" -d "$html_file.files_path" -t "$html_file" --datatype "$input_file.ext"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
6 --baitbed "$bait_bed" --targetbed "$target_bed" -n "$out_prefix" --tmpdir "${__new_file_path__}"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
7 -j "\$JAVA_JAR_PATH/CalculateHsMetrics.jar"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
8
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
9 </command>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
10 <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
11 <inputs>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
12 <param format="sam,bam" name="input_file" type="data" label="SAM/BAM dataset to generate statistics for" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
13 <param name="out_prefix" value="Picard HS Metrics" type="text" label="Title for the output file" help="Use to remind you what the job was for." size="80" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
14 <param name="bait_bed" type="data" format="bed,interval" label="Bait intervals: Sequences for bait in the design" help="Note specific format requirements below!" size="80" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
15 <param name="target_bed" type="data" format="bed,interval" label="Target intervals: Sequences for targets in the design" help="Note specific format requirements below!" size="80" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
16 <!--
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
17
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
18 Users can be enabled to set Java heap size by uncommenting this option and adding '-x "$maxheap"' to the <command> tag.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
19 If commented out the heapsize defaults to the value specified within picard_wrapper.py
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
20
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
21 <param name="maxheap" type="select"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
22 help="If in doubt, try the default. If it fails with a complaint about java heap size, try increasing it please - larger jobs will require your own hardware."
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
23 label="Java heap size">
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
24 <option value="4G" selected = "true">4GB default </option>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
25 <option value="8G" >8GB use if 4GB fails</option>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
26 <option value="16G">16GB - try this if 8GB fails</option>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
27 </param>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
28
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
29 -->
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
30 </inputs>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
31 <outputs>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
32 <data format="html" name="html_file" label="${out_prefix}.html" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
33 </outputs>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
34 <tests>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
35 <test>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
36 <!-- Uncomment this if maxheap parameter is enabled
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
37 <param name="maxheap" value="8G" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
38 -->
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
39 <param name="out_prefix" value="HSMetrics" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
40 <param name="input_file" value="picard_input_summary_alignment_stats.sam" ftype="sam" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
41 <param name="bait_bed" value="picard_input_bait.bed" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
42 <param name="target_bed" value="picard_input_bait.bed" />
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
43 <output name="html_file" file="picard_output_hs_transposed_summary_alignment_stats.html" ftype="html" lines_diff="212"/>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
44 </test>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
45 </tests>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
46 <help>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
47
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
48 .. class:: infomark
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
49
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
50 **Summary**
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
51
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
52 Calculates a set of Hybrid Selection specific metrics from an aligned SAM or BAM file.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
53
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
54 .. class:: warnmark
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
55
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
56 **WARNING about bait and target files**
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
57
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
58 Picard is very fussy about the bait and target file format. If these are not exactly right, it will fail with an error something like:
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
59
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
60 Exception in thread "main" net.sf.picard.PicardException: Invalid interval record contains 6 fields: chr1 45787123 45787316 CASO_22G_25063 1000 +
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
61
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
62 If you see an error like that from this tool, please do NOT report it to any of the Galaxy mailing lists as it is not a bug!
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
63 It means you must reformat your bait and target files. Galaxy cannot do that for you automatically unfortunately.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
64
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
65 The required definition is described in the documentation at http://www.broadinstitute.org/gsa/wiki/index.php/Built-in_command-line_arguments
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
66 and the sample provided looks like this:
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
67
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
68 chr1 1104841 1104940 + target_1
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
69 chr1 1105283 1105599 + target_2
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
70 chr1 1105712 1105860 + target_3
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
71 chr1 1105960 1106119 + target_4
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
72
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
73 So your bait and target files MUST have 5 columns with chr, start, end, strand and name tab delimited and in exactly that order.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
74 Note that the Picard mandated sam header described in the documentation linked above is automagically added by the tool in Galaxy.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
75
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
76 .. class:: infomark
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
77
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
78 **Picard documentation**
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
79
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
80 This is a Galaxy wrapper for CalculateHsMetrics.jar, a part of the external package Picard-tools_.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
81
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
82 .. _Picard-tools: http://www.google.com/search?q=picard+samtools
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
83
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
84 -----
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
85
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
86 .. class:: infomark
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
87
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
88 **Inputs, outputs, and parameters**
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
89
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
90 Picard documentation says (reformatted for Galaxy):
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
91
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
92 Calculates a set of Hybrid Selection specific metrics from an aligned SAM or BAM file.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
93
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
94 .. csv-table::
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
95 :header-rows: 1
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
96
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
97 "Option", "Description"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
98 "BAIT_INTERVALS=File","An interval list file that contains the locations of the baits used. Required."
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
99 "TARGET_INTERVALS=File","An interval list file that contains the locations of the targets. Required."
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
100 "INPUT=File","An aligned SAM or BAM file. Required."
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
101 "OUTPUT=File","The output file to write the metrics to. Required. Cannot be used in conjuction with option(s) METRICS_FILE (M)"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
102 "METRICS_FILE=File","Legacy synonym for OUTPUT, should not be used. Required. Cannot be used in conjuction with option(s) OUTPUT (O)"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
103 "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created. Default value: false"
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
104
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
105 HsMetrics
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
106
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
107 The set of metrics captured that are specific to a hybrid selection analysis.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
108
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
109 Output Column Definitions::
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
110
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
111 1. BAIT_SET: The name of the bait set used in the hybrid selection.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
112 2. GENOME_SIZE: The number of bases in the reference genome used for alignment.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
113 3. BAIT_TERRITORY: The number of bases which have one or more baits on top of them.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
114 4. TARGET_TERRITORY: The unique number of target bases in the experiment where target is usually exons etc.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
115 5. BAIT_DESIGN_EFFICIENCY: Target terrirtoy / bait territory. 1 == perfectly efficient, 0.5 = half of baited bases are not target.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
116 6. TOTAL_READS: The total number of reads in the SAM or BAM file examine.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
117 7. PF_READS: The number of reads that pass the vendor's filter.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
118 8. PF_UNIQUE_READS: The number of PF reads that are not marked as duplicates.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
119 9. PCT_PF_READS: PF reads / total reads. The percent of reads passing filter.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
120 10. PCT_PF_UQ_READS: PF Unique Reads / Total Reads.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
121 11. PF_UQ_READS_ALIGNED: The number of PF unique reads that are aligned with mapping score > 0 to the reference genome.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
122 12. PCT_PF_UQ_READS_ALIGNED: PF Reads Aligned / PF Reads.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
123 13. PF_UQ_BASES_ALIGNED: The number of bases in the PF aligned reads that are mapped to a reference base. Accounts for clipping and gaps.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
124 14. ON_BAIT_BASES: The number of PF aligned bases that mapped to a baited region of the genome.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
125 15. NEAR_BAIT_BASES: The number of PF aligned bases that mapped to within a fixed interval of a baited region, but not on a baited region.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
126 16. OFF_BAIT_BASES: The number of PF aligned bases that mapped to neither on or near a bait.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
127 17. ON_TARGET_BASES: The number of PF aligned bases that mapped to a targetted region of the genome.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
128 18. PCT_SELECTED_BASES: On+Near Bait Bases / PF Bases Aligned.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
129 19. PCT_OFF_BAIT: The percentage of aligned PF bases that mapped neither on or near a bait.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
130 20. ON_BAIT_VS_SELECTED: The percentage of on+near bait bases that are on as opposed to near.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
131 21. MEAN_BAIT_COVERAGE: The mean coverage of all baits in the experiment.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
132 22. MEAN_TARGET_COVERAGE: The mean coverage of targets that recieved at least coverage depth = 2 at one base.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
133 23. PCT_USABLE_BASES_ON_BAIT: The number of aligned, de-duped, on-bait bases out of the PF bases available.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
134 24. PCT_USABLE_BASES_ON_TARGET: The number of aligned, de-duped, on-target bases out of the PF bases available.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
135 25. FOLD_ENRICHMENT: The fold by which the baited region has been amplified above genomic background.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
136 26. ZERO_CVG_TARGETS_PCT: The number of targets that did not reach coverage=2 over any base.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
137 27. FOLD_80_BASE_PENALTY: The fold over-coverage necessary to raise 80% of bases in "non-zero-cvg" targets to the mean coverage level in those targets.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
138 28. PCT_TARGET_BASES_2X: The percentage of ALL target bases acheiving 2X or greater coverage.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
139 29. PCT_TARGET_BASES_10X: The percentage of ALL target bases acheiving 10X or greater coverage.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
140 30. PCT_TARGET_BASES_20X: The percentage of ALL target bases acheiving 20X or greater coverage.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
141 31. PCT_TARGET_BASES_30X: The percentage of ALL target bases acheiving 30X or greater coverage.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
142 32. HS_LIBRARY_SIZE: The estimated number of unique molecules in the selected part of the library.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
143 33. HS_PENALTY_10X: The "hybrid selection penalty" incurred to get 80% of target bases to 10X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 10X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 10 * HS_PENALTY_10X.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
144 34. HS_PENALTY_20X: The "hybrid selection penalty" incurred to get 80% of target bases to 20X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 20X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 20 * HS_PENALTY_20X.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
145 35. HS_PENALTY_30X: The "hybrid selection penalty" incurred to get 80% of target bases to 10X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 30X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 30 * HS_PENALTY_30X.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
146
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
147 .. class:: warningmark
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
148
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
149 **Warning on SAM/BAM quality**
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
150
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
151 Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
152 flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears to be the only way to deal with SAM/BAM that cannot be parsed.
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
153
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
154
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
155 </help>
ff4ec13e496e Uploaded tarball to repository
devteam
parents:
diff changeset
156 </tool>