comparison picard_GCBiasMetrics.xml @ 104:0cab05ac1cbd draft

Uploaded
author devteam
date Mon, 24 Feb 2014 23:29:58 -0500
parents
children
comparison
equal deleted inserted replaced
103:ae3d9a7c8399 104:0cab05ac1cbd
1 <tool name="SAM/BAM GC Bias Metrics" id="PicardGCBiasMetrics" version="1.106.0">
2 <command interpreter="python">
3 picard_wrapper.py -i "${input_file}" -d "${html_file.files_path}" -t "${html_file}"
4 --windowsize "${windowsize}" --mingenomefrac "${mingenomefrac}" -n "${out_prefix}" --tmpdir "${__new_file_path__}"
5 -j "\$JAVA_JAR_PATH/CollectGcBiasMetrics.jar"
6 #if $genomeSource.refGenomeSource == "history":
7 --ref-file "${genomeSource.ownFile}"
8 #else:
9 --ref "${genomeSource.index.fields.path}"
10 #end if
11 </command>
12 <requirements><requirement type="package" version="1.106.0">picard</requirement></requirements>
13 <inputs>
14 <param format="sam,bam" name="input_file" type="data" label="SAM/BAM dataset to generateGC bias metrics"
15 help="If empty, upload or import a SAM/BAM dataset."/>
16 <param name="out_prefix" value="Short Read GC Bias Metrics" type="text"
17 label="Title for the output file" help="Use this remind you what the job was for." size="80" />
18 <conditional name="genomeSource">
19 <param name="refGenomeSource" type="select" label="Select Reference Genome">
20 <option value="default" selected="true">Use the assigned data genome/build</option>
21 <option value="indexed">Select a different built-in genome</option>
22 <option value="history">Use a genome (fasta format) from my history</option>
23 </param>
24 <when value="default">
25 <param name="index" type="select" label="Check the assigned reference genome" help="Galaxy thinks that the reads in you dataset were aligned against this reference. If this is not correct, use the 'Select a build-in reference genome' option of the 'Select Reference Genome' dropdown to select approprtiate Reference.">
26 <options from_data_table="all_fasta">
27 <filter type="data_meta" ref="input_file" key="dbkey" column="1" multiple="True" separator=","/>
28 <validator type="no_options" message="No reference build available for the selected input data" />
29 </options>
30 </param>
31 </when>
32 <when value="indexed">
33 <param name="index" type="select" label="Select a built-in reference genome" help="This list contains genomes cached at this Galaxy instance. If your genome of interest is not present here request it by using 'Help' link at the top of Galaxy interface or use the 'Use a genome (fasta format) from my history' option of the 'Select Reference Genome' dropdown.">
34 <options from_data_table="all_fasta"/>
35 </param>
36 </when>
37 <when value="history">
38 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome from history" help="This option works best for relatively small genomes. If you are working with large human-sized genomes, send request to Galaxy team for adding your reference to this Galaxy instance by using 'Help' link at the top of Galaxy interface."/>
39 </when>
40 </conditional>
41 <param name="windowsize" type="integer" label="GC minimum window size" value="100"
42 help="The size of windows on the genome that are used to bin reads. Default value: 100."/>
43 <param name="mingenomefrac" value="0.00001" type="float" label="Minimum Genome Fraction"
44 help="For summary metrics, exclude GC windows that include less than this fraction of the genome. Default value: 1.0E-5." />
45 <!--
46
47 Users can be enabled to set Java heap size by uncommenting this option and adding '-x "$maxheap"' to the <command> tag.
48 If commented out the heapsize defaults to the value specified within picard_wrapper.py
49
50 <param name="maxheap" type="select" help="If in doubt, choose 8G and read Picard documentation please"
51 label="Java heap size">
52 <option value="1G">1GB: very small data</option>
53 <option value="2G" selected="true">2GB</option>
54 <option value="4G">4GB for larger datasets </option>
55 <option value="8G" >8GB use if 4GB fails</option>
56 <option value="16G">16GB - try this if 8GB fails</option>
57 </param>
58
59 -->
60
61 </inputs>
62 <outputs>
63 <data format="html" name="html_file" label="${out_prefix}.html"/>
64 </outputs>
65 <tests>
66 <test>
67 <!-- Uncomment this if maxheap is enabled above
68 <param name="maxheap" value="8G" />
69 -->
70 <param name="out_prefix" value="CollectGCBias" />
71 <param name="windowsize" value="100" />
72 <param name="mingenomefrac" value="0.00001" />
73 <param name="refGenomeSource" value="history" />
74 <param name="ownFile" value="picard_input_hg18.trimmed.fasta" dbkey="hg18" />
75 <param name="input_file" value="picard_input_summary_alignment_stats.sam" ftype="sam" dbkey="hg18"/>
76 <output name="html_file" file="picard_output_GcBias_uploaded_hg18_summary_alignment_stats.html" ftype="html" lines_diff="50"/>
77 </test>
78 </tests>
79 <help>
80
81
82 .. class:: infomark
83
84 **Summary**
85
86 This Galaxy tool uses Picard to report detailed metrics about reads that fall within windows of a certain GC bin on the reference genome.
87
88 Requires R to be installed on the Galaxy server (it generally is).
89
90 **Picard documentation**
91
92 This is a Galaxy wrapper for CollectGcBiasMetrics, a part of the external package Picard-tools_.
93
94 .. _Picard-tools: http://www.google.com/search?q=picard+samtools
95
96 -----
97
98 .. class:: infomark
99
100 **Syntax**
101
102 - **Input** - SAM/BAM format aligned short read data in your current history
103 - **Title** - the title to use for all output files from this job - use it for high level metadata
104 - **Reference Genome** - Galaxy (and Picard) needs to know which genomic reference was used to generate alignemnts within the input SAM/BAM dataset. Here you have three choices:
105
106 - *Assigned data genome/build* - a genome specified for this dataset. If you your SAM/BAM dataset has an assigned reference genome it will be displayed below this dropdown. If it does not -> use one of the following two options.
107 - *Select a different built-in genome* - this option will list all reference genomes presently cached at this instance of Galaxy.
108 - *Select a reference genome from history* - alternatively you can upload your own version of reference genome into your history and use it with this option. This is however not advisable with large human-sized genomes. If your genome is large contact Galaxy team using "Help" link at the top of the interface and provide exact details on where we can download sequences you would like to use as the refenece. We will then install them as a part of locally cached genomic references.
109
110 - **Window Size** see Picard documentation http://picard.sourceforge.net/command-line-overview.shtml#CollectGCBiasMetrics
111 - **Minimum Genome Fraction** See Picard documentation at http://picard.sourceforge.net/command-line-overview.shtml#CollectGCBiasMetrics
112
113 -----
114
115 .. class:: infomark
116
117 **Inputs, outputs, and parameters**
118
119 The Picard documentation (reformatted for Galaxy) says:
120
121 .. csv-table::
122 :header-rows: 1
123
124 Option,Description
125 "REFERENCE_SEQUENCE=File","The reference sequence fasta file. Required."
126 "INPUT=File","The BAM or SAM file containing aligned reads. Required."
127 "OUTPUT=File","The text file to write the metrics table to. Required."
128 "CHART_OUTPUT=File","The PDF file to render the chart to. Required."
129 "SUMMARY_OUTPUT=File","The text file to write summary metrics to. Default value: null."
130 "WINDOW_SIZE=Integer","The size of windows on the genome that are used to bin reads. Default value: 100."
131 "MINIMUM_GENOME_FRACTION=Double","For summary metrics, exclude GC windows that include less than this fraction of the genome. Default value: 1.0E-5."
132 "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created. Default value: false."
133
134 The output produced by the tool has the following columns::
135
136 1. GC: The G+C content of the reference sequence represented by this bin. Values are from 0% to 100%
137 2. WINDOWS: The number of windows on the reference genome that have this G+C content.
138 3. READ_STARTS: The number of reads who's start position is at the start of a window of this GC.
139 4. MEAN_BASE_QUALITY: The mean quality (determined via the error rate) of all bases of all reads that are assigned to windows of this GC.
140 5. NORMALIZED_COVERAGE: The ration of "coverage" in this GC bin vs. the mean coverage of all GC bins. A number of 1 represents mean coverage, a number less than one represents lower than mean coverage (e.g. 0.5 means half as much coverage as average) while a number greater than one represents higher than mean coverage (e.g. 3.1 means this GC bin has 3.1 times more reads per window than average).
141 6. ERROR_BAR_WIDTH: The radius of error bars in this bin based on the number of observations made. For example if the normalized coverage is 0.75 and the error bar width is 0.1 then the error bars would be drawn from 0.65 to 0.85.
142
143 .. class:: warningmark
144
145 **Warning on SAM/BAM quality**
146
147 Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
148 flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
149 to be the only way to deal with SAM/BAM that cannot be parsed.
150
151 </help>
152 </tool>