# HG changeset patch # User jbrayet # Date 1448439855 18000 # Node ID e3bc0a0be3431c188743ed13129aa33a23d708c2 # Parent 25c5a69f5324663c9188d74f531aa1ac726e7fa7 Uploaded diff -r 25c5a69f5324 -r e3bc0a0be343 samtools_stats.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools_stats.xml Wed Nov 25 03:24:15 2015 -0500 @@ -0,0 +1,296 @@ + + generate statistics for BAM dataset + + macros.xml + + + + + "${output}" + #if $split_output.split_output_selector == "yes": + #set outputs_to_split = str($split_output.generate_tables).split(',') + && mkdir split && echo ${split_output.generate_tables} && + + #if 'sn' in $outputs_to_split: + echo "# Summary Numbers\n" > "split/Summary numbers.tab" && + grep -q ^SN "${output}" ; if [ $? = 0 ] ; then grep ^SN "${output}" | cut -f 2- >> "split/Summary numbers.tab" ; fi && + #end if + + #if 'ffq' in $outputs_to_split: + echo "# Columns correspond to qualities and rows to cycles. First column is the cycle number\n" > "split/First Fragment Qualities.tab" && + grep -q ^FFQ "${output}" ; if [ $? = 0 ] ; then grep ^FFQ "${output}" | cut -f 2- >> "split/First Fragment Qualities.tab" ; fi && + #end if + + #if 'lfq' in $outputs_to_split: + echo "# Columns correspond to qualities and rows to cycles. First column is the cycle number" > "split/Last Fragment Qualities.tab" && + grep -q ^LFQ "${output}" ; if [ $? = 0 ] ; then grep ^LFQ "${output}" | cut -f 2- >> "split/Last Fragment Qualities.tab" ; fi && + #end if + + #if 'mpc' in $outputs_to_split: + echo "# Columns correspond to qualities, rows to cycles. First column is the cycle number, second is the number of N's and the rest is the number of mismatches" > "split/Mismatches per cycle.tab" && + grep -q ^MPC "${output}" ; if [ $? = 0 ] ; then grep ^MPC "${output}" | cut -f 2- >> "split/Mismatches per cycle.tab" ; fi && + #end if + + #if 'gcf' in $outputs_to_split: + echo "# GC Content of first fragments" > "split/GC Content of first fragments.tab" && + grep -q ^GCF "${output}" ; if [ $? = 0 ] ; then grep ^GCF "${output}" | cut -f 2- >> "split/GC Content of first fragments.tab" ; fi && + #end if + + #if 'gcl' in $outputs_to_split: + echo "# GC Content of last fragments" > "split/GC Content of last fragments.tab" && + grep -q ^GCL "${output}" ; if [ $? = 0 ] ; then grep ^GCL "${output}" | cut -f 2- >> "split/GC Content of last fragments.tab" ; fi && + #end if + + #if 'gcc' in $outputs_to_split: + echo "# ACGT content per cycle. The columns are: cycle, and A,C,G,T counts (percent)" > "split/ACGT content per cycle.tab" && + grep -q ^GCC "${output}" ; if [ $? = 0 ] ; then grep ^GCC "${output}" | cut -f 2- >> "split/ACGT content per cycle.tab" ; fi && + #end if + + #if 'is' in $outputs_to_split: + echo "# Insert sizes. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs" > "split/Insert sizes.tab" && + grep -q ^IS "${output}" ; if [ $? = 0 ] ; then grep ^IS "${output}" | cut -f 2- >> "split/Insert sizes.tab" ; fi && + #end if + + #if 'rl' in $outputs_to_split: + echo "# Read lengths. The columns are: read length, count" > "split/Read lengths.tab" && + grep -q ^RL "${output}" ; if [ $? = 0 ] ; then grep ^RL "${output}" | cut -f 2- >> "split/Read lengths.tab" ; fi && + #end if + + #if 'id' in $outputs_to_split: + echo "# Indel distribution. The columns are: length, number of insertions, number of deletions" > "split/Indel distribution.tab" && + grep -q ^ID "${output}" ; if [ $? = 0 ] ; then grep ^ID "${output}" | cut -f 2- >> "split/Indel distribution.tab" ; fi && + #end if + + #if 'ic' in $outputs_to_split: + echo "# Indels per cycle. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev)" > "split/Indels per cycle.tab" && + grep -q ^IC "${output}" ; if [ $? = 0 ] ; then grep ^IC "${output}" | cut -f 2- >> "split/Indels per cycle.tab" ; fi && + #end if + + #if 'cov' in $outputs_to_split: + echo "# Coverage distribution" > "split/Coverage distribution.tab" && + grep -q ^COV "${output}" ; if [ $? = 0 ] ; then grep ^COV "${output}" | cut -f 2- >> "split/Coverage distribution.tab" ; fi && + #end if + + #if 'gcd' in $outputs_to_split: + echo "# GC-depth. The columns are: GC%, unique sequence percentiles, 10th, 25th, 50th, 75th and 90th depth percentile" > "split/GC depth.tab" && + grep -q ^GCD "${output}" ; if [ $? = 0 ] ; then grep ^GCD "${output}" | cut -f 2- >> "split/GC depth.tab" ; fi && + #end if + + ## Unix true command below + + true + + #end if + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,, Coverage distribution min,max,step [1,1000,1] + -d, --remove-dups Exclude from statistics reads marked as duplicates + -f, --required-flag Required flag, 0 for unset. See also `samtools flags` [0] + -F, --filtering-flag Filtering flag, 0 for unset. See also `samtools flags` [0] + --GC-depth the size of GC-depth bins (decreasing bin size increases memory requirement) [2e4] + -h, --help This help message + -i, --insert-size Maximum insert size [8000] + -I, --id Include only listed read group or sample name + -l, --read-length Include in the statistics only reads with the given read length [] + -m, --most-inserts Report only the main part of inserts [0.99] + -q, --trim-quality The BWA trimming parameter [0] + -r, --ref-seq Reference sequence (required for GC-depth and mismatches-per-cycle calculation). Galaxy + will provide options for selecting a reference cached as this Galaxy instance or choosing + one from history. + + + ]]> + + +