picard: picard_AddOrReplaceReadGroups.xml annotate

author	devteam
date	Tue, 23 Oct 2012 10:49:35 -0400 (2012-10-23)
parents
children	52fdfc45590a

rev	line source
0 ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	1 <tool name="Add or Replace Groups" id="picard_ARRG" version="1.56.0">
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	2 <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	3 <command interpreter="python">
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	4 picard_wrapper.py
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	5 --input="$inputFile"
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	6 --rg-lb="$rglb"
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	7 --rg-pl="$rgpl"
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	8 --rg-pu="$rgpu"
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	9 --rg-sm="$rgsm"
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	10 --rg-id="$rgid"
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	11 --rg-opts=${readGroupOpts.rgOpts}
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	12 #if $readGroupOpts.rgOpts == "full"
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	13 --rg-cn="$readGroupOpts.rgcn"
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	14 --rg-ds="$readGroupOpts.rgds"
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	15 #end if
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	16 --output-format=$outputFormat
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	17 --output=$outFile
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	18 -j "\$JAVA_JAR_PATH/AddOrReplaceReadGroups.jar"
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	19 --tmpdir "${__new_file_path__}"
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	20 </command>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	21 <inputs>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	22 <param format="bam,sam" name="inputFile" type="data" label="SAM/BAM dataset to add or replace read groups in"
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	23 help="If empty, upload or import a SAM/BAM dataset." />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	24 <param name="rgid" value="1" type="text" label="Read group ID (ID tag)" help="The most important read group tag. Galaxy will use a value of '1' if nothing provided." />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	25 <param name="rgsm" value="" type="text" label="Read group sample name (SM tag)" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	26 <param name="rglb" value="" type="text" label="Read group library (LB tag)" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	27 <param name="rgpl" value="" type="text" label="Read group platform (PL tag)" help="illumina, solid, 454, pacbio, helicos" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	28 <param name="rgpu" value="" type="text" label="Read group platform unit" help="like run barcode, etc." />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	29 <conditional name="readGroupOpts">
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	30 <param name="rgOpts" type="select" label="Specify additional (optional) arguments" help="Allows you to set RGCN and RGDS.">
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	31 <option value="preSet">Use pre-set defaults</option>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	32 <option value="full">Set optional arguments</option>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	33 </param>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	34 <when value="preSet" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	35 <when value="full">
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	36 <param name="rgcn" value="" type="text" label="Read group sequencing center name" help="Leave set to <null> for default (none)" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	37 <param name="rgds" value="" type="text" label="Read group description" help="Leave set to <null> for default (none)" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	38 </when>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	39 </conditional>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	40 <param name="outputFormat" type="boolean" checked="True" truevalue="bam" falsevalue="sam" label="Output bam instead of sam" help="Uncheck for sam output" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	41 </inputs>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	42 <outputs>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	43 <data name="outFile" format="bam" label="${tool.name} on ${on_string}: ${outputFormat} with read groups replaced">
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	44 <change_format>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	45 <when input="outputFormat" value="sam" format="sam" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	46 </change_format>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	47 </data>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	48 </outputs>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	49 <tests>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	50 <test>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	51 <!-- Command for replacing read groups in bam:
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	52 java -jar AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_ARRG_input1.bam O=picard_ARRG_output1.sam RGID=one RGLB=lib RGPL=illumina RGPU=peaewe RGSM=sam1
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	53 -->
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	54 <param name="inputFile" value="picard_ARRG_input1.bam" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	55 <param name="rglb" value="lib" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	56 <param name="rgpl" value="illumina" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	57 <param name="rgpu" value="peaewe" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	58 <param name="rgsm" value="sam1" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	59 <param name="rgid" value="one" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	60 <param name="rgOpts" value="preSet" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	61 <param name="outputFormat" value="False" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	62 <output name="outFile" file="picard_ARRG_output1.sam" ftype="sam" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	63 </test>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	64 <test>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	65 <!-- Command for replacing read groups in sam:
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	66 java -jar AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_ARRG_input1.sam O=picard_ARRG_output2.sam RGLB=LIB RGPL=IL RGPU=PLAT RGSM=smp RGID=M5 RGCN=FamousCenter RGDS="description with spaces"
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	67 picard_ARRG_input1.bam can be created from picard_ARRG_input1.sam
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	68 -->
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	69 <param name="inputFile" value="picard_ARRG_input1.sam" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	70 <param name="rglb" value="LIB" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	71 <param name="rgpl" value="IL" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	72 <param name="rgpu" value="PLAT" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	73 <param name="rgsm" value="smp" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	74 <param name="rgid" value="M5" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	75 <param name="rgOpts" value="full" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	76 <param name="rgcn" value="FamousCenter" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	77 <param name="rgds" value="description with spaces" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	78 <param name="outputFormat" value="False" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	79 <output name="outFile" file="picard_ARRG_output2.sam" ftype="sam" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	80 </test>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	81 <test>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	82 <!-- Command for adding read groups in sam:
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	83 java -jar AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_ARRG_input2.sam O=picard_ARRG_output3.bam RGID=M6 RGLB=LIB RGPL=IL RGPU=PLAT RGSM=smp1
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	84 -->
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	85 <param name="inputFile" value="picard_ARRG_input2.sam" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	86 <param name="rglb" value="LIB" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	87 <param name="rgpl" value="IL" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	88 <param name="rgpu" value="PLAT" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	89 <param name="rgsm" value="smp1" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	90 <param name="rgid" value="M6" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	91 <param name="rgOpts" value="preSet" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	92 <param name="outputFormat" value="True" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	93 <output name="outFile" file="picard_ARRG_output3.bam" ftype="bam" />
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	94 </test>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	95 </tests>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	96 <help>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	97
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	98 .. class:: infomark
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	99
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	100 Purpose
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	101
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	102 Add or Replace Read Groups in an input BAM or SAM file.
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	103
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	104 Read Groups are Important!
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	105
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	106 Many downstream analysis tools (such as GATK, for example) require BAM datasets to contain read groups. Even if you are not going to use GATK, setting read groups correctly from the start will simplify your life greatly. Below we provide an explanation of read groups fields taken from GATK FAQ webpage:
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	107
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	108 .. csv-table::
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	109 :header-rows: 1
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	110
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	111 Tag,Importance,Definition,Meaning
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	112 "ID","Required","Read group identifier. Each @RG line must have a unique ID. The value of ID is used in the RG tags of alignment records. Must be unique among all read groups in header section. Read group IDs may be modified when merging SAM files in order to handle collisions.","Ideally, this should be a globally unique identify across all sequencing data in the world, such as the Illumina flowcell + lane name and number. Will be referenced by each read with the RG:Z field, allowing tools to determine the read group information associated with each read, including the sample from which the read came. Also, a read group is effectively treated as a separate run of the NGS instrument in tools like base quality score recalibration (a GATK component) -- all reads within a read group are assumed to come from the same instrument run and to therefore share the same error model."
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	113 "SM","Sample. Use pool name where a pool is being sequenced.","Required. As important as ID.","The name of the sample sequenced in this read group. GATK tools treat all read groups with the same SM value as containing sequencing data for the same sample. Therefore it's critical that the SM field be correctly specified, especially when using multi-sample tools like the Unified Genotyper (a GATK component)."
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	114 "PL","Platform/technology used to produce the read. Valid values: ILLUMINA, SOLID, LS454, HELICOS and PACBIO.","Important. Not currently used in the GATK, but was in the past, and may return. The only way to known the sequencing technology used to generate the sequencing data","It's a good idea to use this field."
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	115 "LB","DNA preparation library identify","Essential for MarkDuplicates","MarkDuplicates uses the LB field to determine which read groups might contain molecular duplicates, in case the same DNA library was sequenced on multiple lanes."
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	116
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	117 Example of Read Group usage
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	118
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	119 Support we have a trio of samples: MOM, DAD, and KID. Each has two DNA libraries prepared, one with 400 bp inserts and another with 200 bp inserts. Each of these libraries is run on two lanes of an illumina hiseq, requiring 3 x 2 x 2 = 12 lanes of data. When the data come off the sequencer, we would create 12 BAM files, with the following @RG fields in the header::
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	120
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	121 Dad's data:
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	122 @RG ID:FLOWCELL1.LANE1 PL:illumina LB:LIB-DAD-1 SM:DAD PI:200
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	123 @RG ID:FLOWCELL1.LANE2 PL:illumina LB:LIB-DAD-1 SM:DAD PI:200
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	124 @RG ID:FLOWCELL1.LANE3 PL:illumina LB:LIB-DAD-2 SM:DAD PI:400
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	125 @RG ID:FLOWCELL1.LANE4 PL:illumina LB:LIB-DAD-2 SM:DAD PI:400
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	126
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	127 Mom's data:
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	128 @RG ID:FLOWCELL1.LANE5 PL:illumina LB:LIB-MOM-1 SM:MOM PI:200
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	129 @RG ID:FLOWCELL1.LANE6 PL:illumina LB:LIB-MOM-1 SM:MOM PI:200
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	130 @RG ID:FLOWCELL1.LANE7 PL:illumina LB:LIB-MOM-2 SM:MOM PI:400
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	131 @RG ID:FLOWCELL1.LANE8 PL:illumina LB:LIB-MOM-2 SM:MOM PI:400
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	132
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	133 Kid's data:
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	134 @RG ID:FLOWCELL2.LANE1 PL:illumina LB:LIB-KID-1 SM:KID PI:200
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	135 @RG ID:FLOWCELL2.LANE2 PL:illumina LB:LIB-KID-1 SM:KID PI:200
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	136 @RG ID:FLOWCELL2.LANE3 PL:illumina LB:LIB-KID-2 SM:KID PI:400
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	137 @RG ID:FLOWCELL2.LANE4 PL:illumina LB:LIB-KID-2 SM:KID PI:400
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	138
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	139 Note the hierarchical relationship between read groups (unique for each lane) to libraries (sequenced on two lanes) and samples (across four lanes, two lanes for each library).
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	140
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	141 Picard documentation
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	142
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	143 This is a Galaxy wrapper for AddOrReplaceReadGroups, a part of the external package Picard-tools_.
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	144
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	145 .. _Picard-tools: http://www.google.com/search?q=picard+samtools
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	146
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	147 ------
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	148
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	149 .. class:: infomark
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	150
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	151 Inputs, outputs, and parameters
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	152
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	153 Either a sam file or a bam file must be supplied. If a bam file is used, it must
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	154 be coordinate-sorted. Galaxy currently coordinate-sorts all bam files.
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	155
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	156 The output file is either bam (the default) or sam, according to user selection,
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	157 and contains the same information as the input file except for the appropraite
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	158 additional (or modified) read group tags. Bam is recommended since it is smaller.
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	159
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	160 From the Picard documentation.
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	161
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	162 AddOrReplaceReadGroups REQUIRED parameters::
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	163
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	164 Option (Type) Description
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	165
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	166 RGLB=String Read Group Library
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	167 RGPL=String Read Group platform (e.g. illumina, solid)
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	168 RGPU=String Read Group platform unit (eg. run barcode)
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	169 RGSM=String Read Group sample name
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	170 RGID=String Read Group ID; Default value: null (empty)
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	171
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	172 AddOrReplaceReadGroups OPTIONAL parameters::
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	173
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	174 Option (Type) Description
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	175
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	176 RGCN=String Read Group sequencing center name; Default value: null (empty)
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	177 RGDS=String Read Group description Default value: null (empty)
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	178
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	179 One parameter that Picard's AddOrReplaceReadGroups offers that is automatically
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	180 set by Galaxy is the SORT_ORDER, which is set to coordinate.
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	181
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	182 .. class:: warningmark
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	183
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	184 Warning on SAM/BAM quality
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	185
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	186 Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the LENIENT
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	187 flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	188 to be the only way to deal with SAM/BAM that cannot be parsed.
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	189
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	190
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	191
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	192 </help>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	193 </tool>
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	194
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	195
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	196
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	197
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	198
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	199
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	200
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	201
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	202
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	203
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	204
ff4ec13e496e Uploaded tarball to repository devteam parents: diff changeset	205

0

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

1 <tool name="Add or Replace Groups" id="picard_ARRG" version="1.56.0">

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

2 <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

3 <command interpreter="python">

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

4 picard_wrapper.py

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

5 --input="$inputFile"

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

6 --rg-lb="$rglb"

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

7 --rg-pl="$rgpl"

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

8 --rg-pu="$rgpu"

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

9 --rg-sm="$rgsm"

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

10 --rg-id="$rgid"

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

11 --rg-opts=${readGroupOpts.rgOpts}

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

12 #if $readGroupOpts.rgOpts == "full"

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

13 --rg-cn="$readGroupOpts.rgcn"

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

14 --rg-ds="$readGroupOpts.rgds"

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

15 #end if

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

16 --output-format=$outputFormat

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

17 --output=$outFile

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

18 -j "\$JAVA_JAR_PATH/AddOrReplaceReadGroups.jar"

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

19 --tmpdir "${__new_file_path__}"

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

20 </command>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

21 <inputs>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

22 <param format="bam,sam" name="inputFile" type="data" label="SAM/BAM dataset to add or replace read groups in"

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

23 help="If empty, upload or import a SAM/BAM dataset." />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

24 <param name="rgid" value="1" type="text" label="Read group ID (ID tag)" help="The most important read group tag. Galaxy will use a value of '1' if nothing provided." />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

25 <param name="rgsm" value="" type="text" label="Read group sample name (SM tag)" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

26 <param name="rglb" value="" type="text" label="Read group library (LB tag)" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

27 <param name="rgpl" value="" type="text" label="Read group platform (PL tag)" help="illumina, solid, 454, pacbio, helicos" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

28 <param name="rgpu" value="" type="text" label="Read group platform unit" help="like run barcode, etc." />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

29 <conditional name="readGroupOpts">

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

30 <param name="rgOpts" type="select" label="Specify additional (optional) arguments" help="Allows you to set RGCN and RGDS.">

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

31 <option value="preSet">Use pre-set defaults</option>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

32 <option value="full">Set optional arguments</option>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

33 </param>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

34 <when value="preSet" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

35 <when value="full">

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

36 <param name="rgcn" value="" type="text" label="Read group sequencing center name" help="Leave set to <null> for default (none)" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

37 <param name="rgds" value="" type="text" label="Read group description" help="Leave set to <null> for default (none)" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

38 </when>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

39 </conditional>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

40 <param name="outputFormat" type="boolean" checked="True" truevalue="bam" falsevalue="sam" label="Output bam instead of sam" help="Uncheck for sam output" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

41 </inputs>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

42 <outputs>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

43 <data name="outFile" format="bam" label="${tool.name} on ${on_string}: ${outputFormat} with read groups replaced">

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

44 <change_format>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

45 <when input="outputFormat" value="sam" format="sam" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

46 </change_format>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

47 </data>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

48 </outputs>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

49 <tests>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

50 <test>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

51 <!-- Command for replacing read groups in bam:

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

52 java -jar AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_ARRG_input1.bam O=picard_ARRG_output1.sam RGID=one RGLB=lib RGPL=illumina RGPU=peaewe RGSM=sam1

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

53 -->

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

54 <param name="inputFile" value="picard_ARRG_input1.bam" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

55 <param name="rglb" value="lib" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

56 <param name="rgpl" value="illumina" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

57 <param name="rgpu" value="peaewe" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

58 <param name="rgsm" value="sam1" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

59 <param name="rgid" value="one" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

60 <param name="rgOpts" value="preSet" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

61 <param name="outputFormat" value="False" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

62 <output name="outFile" file="picard_ARRG_output1.sam" ftype="sam" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

63 </test>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

64 <test>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

65 <!-- Command for replacing read groups in sam:

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

66 java -jar AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_ARRG_input1.sam O=picard_ARRG_output2.sam RGLB=LIB RGPL=IL RGPU=PLAT RGSM=smp RGID=M5 RGCN=FamousCenter RGDS="description with spaces"

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

67 picard_ARRG_input1.bam can be created from picard_ARRG_input1.sam

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

68 -->

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

69 <param name="inputFile" value="picard_ARRG_input1.sam" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

70 <param name="rglb" value="LIB" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

71 <param name="rgpl" value="IL" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

72 <param name="rgpu" value="PLAT" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

73 <param name="rgsm" value="smp" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

74 <param name="rgid" value="M5" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

75 <param name="rgOpts" value="full" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

76 <param name="rgcn" value="FamousCenter" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

77 <param name="rgds" value="description with spaces" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

78 <param name="outputFormat" value="False" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

79 <output name="outFile" file="picard_ARRG_output2.sam" ftype="sam" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

80 </test>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

81 <test>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

82 <!-- Command for adding read groups in sam:

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

83 java -jar AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_ARRG_input2.sam O=picard_ARRG_output3.bam RGID=M6 RGLB=LIB RGPL=IL RGPU=PLAT RGSM=smp1

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

84 -->

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

85 <param name="inputFile" value="picard_ARRG_input2.sam" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

86 <param name="rglb" value="LIB" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

87 <param name="rgpl" value="IL" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

88 <param name="rgpu" value="PLAT" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

89 <param name="rgsm" value="smp1" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

90 <param name="rgid" value="M6" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

91 <param name="rgOpts" value="preSet" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

92 <param name="outputFormat" value="True" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

93 <output name="outFile" file="picard_ARRG_output3.bam" ftype="bam" />

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

94 </test>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

95 </tests>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

96 <help>

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

97

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

98 .. class:: infomark

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

99

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

100 **Purpose**

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

101

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

102 Add or Replace Read Groups in an input BAM or SAM file.

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

103

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

104 **Read Groups are Important!**

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

105

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

106 Many downstream analysis tools (such as GATK, for example) require BAM datasets to contain read groups. Even if you are not going to use GATK, setting read groups correctly from the start will simplify your life greatly. Below we provide an explanation of read groups fields taken from GATK FAQ webpage:

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

107

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

108 .. csv-table::

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

109 :header-rows: 1

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

110

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

111 Tag,Importance,Definition,Meaning

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

112 "ID","Required","Read group identifier. Each @RG line must have a unique ID. The value of ID is used in the RG tags of alignment records. Must be unique among all read groups in header section. Read group IDs may be modified when merging SAM files in order to handle collisions.","Ideally, this should be a globally unique identify across all sequencing data in the world, such as the Illumina flowcell + lane name and number. Will be referenced by each read with the RG:Z field, allowing tools to determine the read group information associated with each read, including the sample from which the read came. Also, a read group is effectively treated as a separate run of the NGS instrument in tools like base quality score recalibration (a GATK component) -- all reads within a read group are assumed to come from the same instrument run and to therefore share the same error model."

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

113 "SM","Sample. Use pool name where a pool is being sequenced.","Required. As important as ID.","The name of the sample sequenced in this read group. GATK tools treat all read groups with the same SM value as containing sequencing data for the same sample. Therefore it's critical that the SM field be correctly specified, especially when using multi-sample tools like the Unified Genotyper (a GATK component)."

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

114 "PL","Platform/technology used to produce the read. Valid values: ILLUMINA, SOLID, LS454, HELICOS and PACBIO.","Important. Not currently used in the GATK, but was in the past, and may return. The only way to known the sequencing technology used to generate the sequencing data","It's a good idea to use this field."

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

115 "LB","DNA preparation library identify","Essential for MarkDuplicates","MarkDuplicates uses the LB field to determine which read groups might contain molecular duplicates, in case the same DNA library was sequenced on multiple lanes."

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

116

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

117 **Example of Read Group usage**

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

118

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

119 Support we have a trio of samples: MOM, DAD, and KID. Each has two DNA libraries prepared, one with 400 bp inserts and another with 200 bp inserts. Each of these libraries is run on two lanes of an illumina hiseq, requiring 3 x 2 x 2 = 12 lanes of data. When the data come off the sequencer, we would create 12 BAM files, with the following @RG fields in the header::

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

120

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

121 Dad's data:

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

122 @RG ID:FLOWCELL1.LANE1 PL:illumina LB:LIB-DAD-1 SM:DAD PI:200

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

123 @RG ID:FLOWCELL1.LANE2 PL:illumina LB:LIB-DAD-1 SM:DAD PI:200

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

124 @RG ID:FLOWCELL1.LANE3 PL:illumina LB:LIB-DAD-2 SM:DAD PI:400

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

125 @RG ID:FLOWCELL1.LANE4 PL:illumina LB:LIB-DAD-2 SM:DAD PI:400

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

126

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

127 Mom's data:

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

128 @RG ID:FLOWCELL1.LANE5 PL:illumina LB:LIB-MOM-1 SM:MOM PI:200

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

129 @RG ID:FLOWCELL1.LANE6 PL:illumina LB:LIB-MOM-1 SM:MOM PI:200

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

130 @RG ID:FLOWCELL1.LANE7 PL:illumina LB:LIB-MOM-2 SM:MOM PI:400

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

131 @RG ID:FLOWCELL1.LANE8 PL:illumina LB:LIB-MOM-2 SM:MOM PI:400

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

132

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

133 Kid's data:

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

134 @RG ID:FLOWCELL2.LANE1 PL:illumina LB:LIB-KID-1 SM:KID PI:200

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

135 @RG ID:FLOWCELL2.LANE2 PL:illumina LB:LIB-KID-1 SM:KID PI:200

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

136 @RG ID:FLOWCELL2.LANE3 PL:illumina LB:LIB-KID-2 SM:KID PI:400

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

137 @RG ID:FLOWCELL2.LANE4 PL:illumina LB:LIB-KID-2 SM:KID PI:400

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

138

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

139 Note the hierarchical relationship between read groups (unique for each lane) to libraries (sequenced on two lanes) and samples (across four lanes, two lanes for each library).

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

140

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

141 **Picard documentation**

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

142

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

143 This is a Galaxy wrapper for AddOrReplaceReadGroups, a part of the external package Picard-tools_.

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

144

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

145 .. _Picard-tools: http://www.google.com/search?q=picard+samtools

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

146

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

147 ------

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

148

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

149 .. class:: infomark

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

150

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

151 **Inputs, outputs, and parameters**

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

152

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

153 Either a sam file or a bam file must be supplied. If a bam file is used, it must

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

154 be coordinate-sorted. Galaxy currently coordinate-sorts all bam files.

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

155

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

156 The output file is either bam (the default) or sam, according to user selection,

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

157 and contains the same information as the input file except for the appropraite

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

158 additional (or modified) read group tags. Bam is recommended since it is smaller.

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

159

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

160 From the Picard documentation.

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

161

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

162 AddOrReplaceReadGroups REQUIRED parameters::

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

163

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

164 Option (Type) Description

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

165

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

166 RGLB=String Read Group Library

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

167 RGPL=String Read Group platform (e.g. illumina, solid)

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

168 RGPU=String Read Group platform unit (eg. run barcode)

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

169 RGSM=String Read Group sample name

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

170 RGID=String Read Group ID; Default value: null (empty)

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

171

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

172 AddOrReplaceReadGroups OPTIONAL parameters::

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

173

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

174 Option (Type) Description

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

175

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

176 RGCN=String Read Group sequencing center name; Default value: null (empty)

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

177 RGDS=String Read Group description Default value: null (empty)

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

178

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

179 One parameter that Picard's AddOrReplaceReadGroups offers that is automatically

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

180 set by Galaxy is the SORT_ORDER, which is set to coordinate.

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

181

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

182 .. class:: warningmark

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

183

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

184 **Warning on SAM/BAM quality**

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

185

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

186 Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

187 flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears

ff4ec13e496e Uploaded tarball to repository

devteam

parents:

diff changeset

188 to be the only way to deal with SAM/BAM that cannot be parsed.

ff4ec13e496e Uploaded tarball to repository

devteam