changeset 0:10e211ab5e5c draft

Uploaded
author eugen
date Mon, 13 Aug 2012 08:07:00 -0400
parents
children d7e73e809691
files bismark.xml bismark_wrapper.sh
diffstat 2 files changed, 295 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bismark.xml	Mon Aug 13 08:07:00 2012 -0400
@@ -0,0 +1,181 @@
+<tool id="bismark" name="Bismark Mapper" version="0.7.3">
+        <command interpreter="bash">
+               bismark_wrapper.sh
+			##Reference genome
+			ref="${indices.fields.path}"
+			##Output files (SAM output, Bismark summary)
+			mapped=$mapped
+			summary=$summary
+			##Temp directory
+			tempdir=$mapped.files_path
+			#if str($singlePaired.sPaired) == "single":
+			  library=single
+			  mate1=$singlePaired.sInput1
+			  #if $singlePaired.sParams.sSettingsType == "full":
+			    fullparam=true
+			    qual=$singlePaired.sParams.qual
+			    seedmms=$singlePaired.sParams.seedmms
+			    seedlen=$singlePaired.sParams.seedlen
+			    maqerr=$singlePaired.sParams.maqerr
+			    directional=$singlePaired.sParams.non_directional
+			    header=$singlePaired.sParams.sam_no_hd
+			  #end if
+			#else:
+			  library=paired
+			  mate1=$singlePaired.pInput1
+			  mate2=$singlePaired.pInput2
+			  #if $singlePaired.pParams.pSettingsType == "full":
+			    fullparam="true"
+			    qual=$singlePaired.pParams.qual
+			    seedmms=$singlePaired.pParams.seedmms
+			    seedlen=$singlePaired.pParams.seedlen
+			    maqerr=$singlePaired.pParams.maqerr
+			    directional=$singlePaired.pParams.non_directional
+			    header=$singlePaired.pParams.sam_no_hd
+			    minins=$singlePaired.pParams.minins
+			    maxins=$singlePaired.pParams.maxins
+			  #end if
+			#end if
+			
+			
+        </command>
+  <inputs>
+  <param name="indices" type="select" label="Select a reference genome">
+	        	<options from_data_table="bismark_bs_indeces">
+		        	<filter type="sort_by" column="2" />
+	                	<validator type="no_options" message="No indexes are available" />
+          		</options>
+  </param>
+  
+  <conditional name="singlePaired">
+      <param name="sPaired" type="select" label="Is this library mate-paired?">
+        <option value="single">Single-end</option>
+        <option value="paired">Paired-end</option>
+      </param>
+      <when value="single">
+        <param name="sInput1" type="data" format="fastq" label="FASTQ file" help="Must have ASCII encoded quality scores"/>
+        <conditional name="sParams">
+          <param name="sSettingsType" type="select" label="Bismark settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list">
+            <option value="preSet">Commonly used</option>
+            <option value="full">Full parameter list</option>
+            </param>
+          <when value="preSet" />
+          <when value="full">
+	    <param name="qual" type="select" label="Select the type of FastQ qualities">
+		<option value="--phred33-quals">phred33-quals</option>
+		<option value="--phred64-quals">phred64-quals</option>
+		<option value="--solexa-quals">solexa-quals</option>
+	    </param>
+	    <param name="seedmms" type="integer" value="2" label="The maximum number of mismatches permitted in the seed" />
+	    <param name="seedlen" type="integer" value="28" label="The seed length" />
+	    <param name="maqerr" type="integer" value="70" label="Maximum permitted total of quality values at all mismatched read positions throughout the entire alignment, not just in the seed" />
+	    <param name="non_directional" type="select" label="Is the library a non-directional one?">
+	      <option value="">No</option>
+	      <option value="--non_directional">Yes</option>
+	    </param>
+	    <param name="sam_no_hd" type="select" label="Should the SAM header lines (starting with @) be supressed?">
+	      <option value="">No</option>
+	      <option value="--sam-no-hd">Yes</option>
+	    </param>
+          </when> <!-- full -->
+        </conditional> <!-- sParams -->
+      </when> <!-- single -->
+   
+      <when value="paired">
+        <param name="pInput1" type="data" format="fastq" label="Forward FASTQ file" />
+	<param name="pInput2" type="data" format="fastq" label="Reverse FASTQ file" />
+
+        <conditional name="pParams">
+          <param name="pSettingsType" type="select" label="Bismark settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list">
+            <option value="preSet">Commonly used</option>
+            <option value="full">Full parameter list</option>
+          </param>
+          <when value="preSet" />
+          <when value="full">
+	    <param name="minins" type="integer" value="0" label="The minimum insert size for valid paired-end alignments" />
+	    <param name="maxins" type="integer" value="500" label="The maximum insert size for valid paired-end alignments" />
+	    <param name="qual" type="select" label="Select the type of FastQ qualities">
+		<option value="--phred33-quals">phred33-quals</option>
+		<option value="--phred64-quals">phred64-quals</option>
+		<option value="--solexa-quals">solexa-quals</option>
+	    </param>
+	    <param name="seedmms" type="integer" value="2" label="The maximum number of mismatches permitted in the seed" />
+	    <param name="seedlen" type="integer" value="28" label="The seed length" />
+	    <param name="maqerr" type="integer" value="70" label="Maximum permitted total of quality values at all mismatched read positions throughout the entire alignment, not just in the seed" />
+	    <param name="non_directional" type="select" label="Is the library a non-directional one?">
+	      <option value="">No</option>
+	      <option value="--non_directional">Yes</option>
+	    </param>
+	    <param name="sam_no_hd" type="select" label="Should the SAM header lines (starting with @) be supressed?">
+	      <option value="">No</option>
+	      <option value="--sam-no-hd">Yes</option>
+	    </param>          
+          </when> <!-- full -->
+        </conditional> <!-- pParams -->
+      </when> <!-- paired -->
+    </conditional> <!-- singlePaired -->
+  
+  
+ </inputs>
+ <outputs>
+        <data name="mapped" format="sam" label="Bismark Mapped Reads" />
+	<data name="summary" format ="txt" label="Bismark Mapping Summary" />
+ </outputs>
+ <help>
+**What it does**
+
+Bismark is a program to map bisulfite treated sequencing reads to a genome of interest and perform methylation calls in a single step. The output can be easily imported into a genome viewer, such as SeqMonk, and enables a researcher to analyse the methylation levels of their samples straight away. It's main features are:
+
+   - Bisulfite mapping and methylation calling in one single step
+   
+   - Supports single-end and paired-end read alignments
+   
+   - Supports ungapped and gapped alignments
+   
+   - Alignment seed length, number of mismatches etc. are adjustable
+   
+   - Output discriminates between cytosine methylation in CpG, CHG and CHH context
+
+.. _Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/
+
+**Input formats**
+
+Bismark accepts files in Sanger FASTQ format.
+
+**Outputs**
+
+The output is in SAM format, and has the following columns::
+
+    Column  	Description
+  --------  	--------------------------------------------------------   
+  1 QNAME	seq-ID
+  2 FLAG 	this flag tries to take the strand a bisulfite read originated from into account (this is different from ordinary DNA alignment flags!)
+  3 RNAME 	chromosome
+  4 POS 	start position
+  5 MAPQ 	always 255
+  6 CIGAR
+  7 RNEXT
+  8 PNEXT
+  9 TLEN
+ 10 SEQ
+ 11 QUAL 	Phred33 scale
+ 12 NM-tag 	edit distance to the reference
+ 13 XX-tag 	base-by-base mismatches to the reference, not including indels
+ 14 XM-tag 	methylation call string
+ 15 XR-tag 	read conversion state for the alignment
+ 16 XG-tag 	genome conversion state for the alignment
+   
+ </help>
+ 
+ <tests>
+   <test>
+     <param name="sPaired" value="single" />
+     <param name="indices" value="bismark" />
+     <param name="sInput1" ftype="fastq" value="bismark_test_single.fastq" />
+     <param name="sParams" value="preSet" />
+     <output name="mapped" ftype="SAM" file="bismark_result_single_1.SAM" />
+     <ouput name="summary" ftype="txt" file="bismark_result_single_2.txt" />
+   </test>
+ </tests>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bismark_wrapper.sh	Mon Aug 13 08:07:00 2012 -0400
@@ -0,0 +1,114 @@
+#!/bin/bash
+#
+# Galaxy wrapper for Bismark
+#
+
+set -e
+
+export PATH="/home/galaxy/dependencies/bismark:$PATH"
+export PATH="/home/galaxy/dependencies/diverse:$PATH"
+
+#get parameters
+
+until [ $# -eq 0 ]
+do
+	case $1 in
+		ref=*)
+			ref=${1#ref=}
+			;;
+		library=*)
+			library=${1#library=}
+			;;
+		fullparam=*)
+			fullparam=${1#fullparam=}
+			;;
+		mate1=*)
+			mate1=${1#mate1=}
+			;;
+		mate2=*)
+			mate2=${1#mate2=}
+			;;
+		qual=*)
+			qual=${1#qual=}
+			;;
+		seedmms=*)
+			seedmms="--seedmms ${1#seedmms=}"
+			;;
+		seedlen=*)
+			seedlen="--seedlen ${1#seedlen=}"
+			;;
+		maqerr=*)
+			maqerr="--maqerr ${1#maqerr=}"
+			;;
+		directional=*)
+			directional=${1#directional=}
+			;;
+		header=*)
+			header=${1#header=}
+			;;
+		minins=*)
+			minins="--minins ${1#minins=}"
+			;;
+		maxins=*)
+			maxins="--maxins ${1#maxins=}"
+			;;
+		mapped=*)
+			mapped=${1#mapped=}
+			;;
+		summary=*)
+			summary=${1#summary=}
+			;;
+		tempdir=*)
+			tempdir=${1#tempdir=}
+			;;
+	esac
+	shift
+done
+
+
+if [ "$library" == "single" ]
+then
+    if [ "$fullparam" == 'false' ]
+    then      
+      bismark --output_dir $tempdir --temp_dir $tempdir --quiet $ref $mate1  2>&1 > /dev/null
+    else
+      bismark --output_dir $tempdir --temp_dir $tempdir --quiet $qual $seedmms $seedlen $maqerr $directional $header $ref $mate1  2>&1 > /dev/null
+    fi
+else
+    if [ "$fullparam" == 'false' ]
+    then
+      bismark --output_dir $tempdir --temp_dir $tempdir --quiet $ref -1 $mate1 -2 $mate2 2>&1 > /dev/null
+    else
+      bismark --output_dir $tempdir --temp_dir $tempdir --quiet $qual $seedmms $seedlen $maqerr $directional $header $minins $maxins $ref -1 $mate1 -2 $mate2 2>&1 > /dev/null
+    fi
+fi
+
+
+#call bismark. output in temp-directory (files_path)
+
+
+#parse the filename of the input -> same as output
+IFS="/"
+set - $mate1
+outfile=${*:$#:1}
+
+#sort the mapped reads by chromosome
+#sort -k 3,3 -k 4,4n "$tempdir/${outfile}_bismark_pe.sam" > "$tempdir/${outfile}_bismark_pe_sorted.sam"
+
+#copy resultfiles back into galaxy
+#cp "$tempdir/${outfile}_bismark_sorted.sam" "$mapped"
+if [ "$library" == "single" ]
+then
+    cp "$tempdir/${outfile}_bismark.sam" "$mapped"
+    cp "$tempdir/${outfile}_Bismark_mapping_report.txt" "$summary"
+else
+    cp "$tempdir/${outfile}_bismark_pe.sam" "$mapped"
+    cp "$tempdir/${outfile}_Bismark_paired-end_mapping_report.txt" "$summary"
+fi
+
+
+
+
+
+
+