# HG changeset patch # User eugen # Date 1344859620 14400 # Node ID 10e211ab5e5cc3fefad8af6e0f26888b3e7286a3 Uploaded diff -r 000000000000 -r 10e211ab5e5c bismark.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark.xml Mon Aug 13 08:07:00 2012 -0400 @@ -0,0 +1,181 @@ + + + bismark_wrapper.sh + ##Reference genome + ref="${indices.fields.path}" + ##Output files (SAM output, Bismark summary) + mapped=$mapped + summary=$summary + ##Temp directory + tempdir=$mapped.files_path + #if str($singlePaired.sPaired) == "single": + library=single + mate1=$singlePaired.sInput1 + #if $singlePaired.sParams.sSettingsType == "full": + fullparam=true + qual=$singlePaired.sParams.qual + seedmms=$singlePaired.sParams.seedmms + seedlen=$singlePaired.sParams.seedlen + maqerr=$singlePaired.sParams.maqerr + directional=$singlePaired.sParams.non_directional + header=$singlePaired.sParams.sam_no_hd + #end if + #else: + library=paired + mate1=$singlePaired.pInput1 + mate2=$singlePaired.pInput2 + #if $singlePaired.pParams.pSettingsType == "full": + fullparam="true" + qual=$singlePaired.pParams.qual + seedmms=$singlePaired.pParams.seedmms + seedlen=$singlePaired.pParams.seedlen + maqerr=$singlePaired.pParams.maqerr + directional=$singlePaired.pParams.non_directional + header=$singlePaired.pParams.sam_no_hd + minins=$singlePaired.pParams.minins + maxins=$singlePaired.pParams.maxins + #end if + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Bismark is a program to map bisulfite treated sequencing reads to a genome of interest and perform methylation calls in a single step. The output can be easily imported into a genome viewer, such as SeqMonk, and enables a researcher to analyse the methylation levels of their samples straight away. It's main features are: + + - Bisulfite mapping and methylation calling in one single step + + - Supports single-end and paired-end read alignments + + - Supports ungapped and gapped alignments + + - Alignment seed length, number of mismatches etc. are adjustable + + - Output discriminates between cytosine methylation in CpG, CHG and CHH context + +.. _Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ + +**Input formats** + +Bismark accepts files in Sanger FASTQ format. + +**Outputs** + +The output is in SAM format, and has the following columns:: + + Column Description + -------- -------------------------------------------------------- + 1 QNAME seq-ID + 2 FLAG this flag tries to take the strand a bisulfite read originated from into account (this is different from ordinary DNA alignment flags!) + 3 RNAME chromosome + 4 POS start position + 5 MAPQ always 255 + 6 CIGAR + 7 RNEXT + 8 PNEXT + 9 TLEN + 10 SEQ + 11 QUAL Phred33 scale + 12 NM-tag edit distance to the reference + 13 XX-tag base-by-base mismatches to the reference, not including indels + 14 XM-tag methylation call string + 15 XR-tag read conversion state for the alignment + 16 XG-tag genome conversion state for the alignment + + + + + + + + + + + + + + + diff -r 000000000000 -r 10e211ab5e5c bismark_wrapper.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_wrapper.sh Mon Aug 13 08:07:00 2012 -0400 @@ -0,0 +1,114 @@ +#!/bin/bash +# +# Galaxy wrapper for Bismark +# + +set -e + +export PATH="/home/galaxy/dependencies/bismark:$PATH" +export PATH="/home/galaxy/dependencies/diverse:$PATH" + +#get parameters + +until [ $# -eq 0 ] +do + case $1 in + ref=*) + ref=${1#ref=} + ;; + library=*) + library=${1#library=} + ;; + fullparam=*) + fullparam=${1#fullparam=} + ;; + mate1=*) + mate1=${1#mate1=} + ;; + mate2=*) + mate2=${1#mate2=} + ;; + qual=*) + qual=${1#qual=} + ;; + seedmms=*) + seedmms="--seedmms ${1#seedmms=}" + ;; + seedlen=*) + seedlen="--seedlen ${1#seedlen=}" + ;; + maqerr=*) + maqerr="--maqerr ${1#maqerr=}" + ;; + directional=*) + directional=${1#directional=} + ;; + header=*) + header=${1#header=} + ;; + minins=*) + minins="--minins ${1#minins=}" + ;; + maxins=*) + maxins="--maxins ${1#maxins=}" + ;; + mapped=*) + mapped=${1#mapped=} + ;; + summary=*) + summary=${1#summary=} + ;; + tempdir=*) + tempdir=${1#tempdir=} + ;; + esac + shift +done + + +if [ "$library" == "single" ] +then + if [ "$fullparam" == 'false' ] + then + bismark --output_dir $tempdir --temp_dir $tempdir --quiet $ref $mate1 2>&1 > /dev/null + else + bismark --output_dir $tempdir --temp_dir $tempdir --quiet $qual $seedmms $seedlen $maqerr $directional $header $ref $mate1 2>&1 > /dev/null + fi +else + if [ "$fullparam" == 'false' ] + then + bismark --output_dir $tempdir --temp_dir $tempdir --quiet $ref -1 $mate1 -2 $mate2 2>&1 > /dev/null + else + bismark --output_dir $tempdir --temp_dir $tempdir --quiet $qual $seedmms $seedlen $maqerr $directional $header $minins $maxins $ref -1 $mate1 -2 $mate2 2>&1 > /dev/null + fi +fi + + +#call bismark. output in temp-directory (files_path) + + +#parse the filename of the input -> same as output +IFS="/" +set - $mate1 +outfile=${*:$#:1} + +#sort the mapped reads by chromosome +#sort -k 3,3 -k 4,4n "$tempdir/${outfile}_bismark_pe.sam" > "$tempdir/${outfile}_bismark_pe_sorted.sam" + +#copy resultfiles back into galaxy +#cp "$tempdir/${outfile}_bismark_sorted.sam" "$mapped" +if [ "$library" == "single" ] +then + cp "$tempdir/${outfile}_bismark.sam" "$mapped" + cp "$tempdir/${outfile}_Bismark_mapping_report.txt" "$summary" +else + cp "$tempdir/${outfile}_bismark_pe.sam" "$mapped" + cp "$tempdir/${outfile}_Bismark_paired-end_mapping_report.txt" "$summary" +fi + + + + + + +