# HG changeset patch # User jjohnson # Date 1494880038 14400 # Node ID 8c17ddca0eeee85c11fdd4e0db83c8a64f2ef048 Uploaded diff -r 000000000000 -r 8c17ddca0eee ._bamtools-split-ref.xml Binary file ._bamtools-split-ref.xml has changed diff -r 000000000000 -r 8c17ddca0eee ._bamtools-split-tag.xml Binary file ._bamtools-split-tag.xml has changed diff -r 000000000000 -r 8c17ddca0eee bamtools-split-mapped.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamtools-split-mapped.xml Mon May 15 16:27:18 2017 -0400 @@ -0,0 +1,55 @@ + + + + bamtools + + + + + + + + + + + + + + + + + + + +**What is does** + +BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). + +----- + +.. class:: warningmark + + +**How it works** + +Splits the input BAM file into 2 output files named (MAPPED) and (UNMAPPED) containing mapped and unmapped reads, respectively. + +----- + +.. class:: infomark + +**More information** + +Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki + + + + 10.1093/bioinformatics/btr174 + + diff -r 000000000000 -r 8c17ddca0eee bamtools-split-paired.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamtools-split-paired.xml Mon May 15 16:27:18 2017 -0400 @@ -0,0 +1,56 @@ + + + + bamtools + + + + + + + + + + + + + + + + + + + +**What is does** + +BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). + +----- + +.. class:: warningmark + + +**How it works** + + +Splits the input BAM file into 2 output files named (SINGLE_END) and (PAIRED_END) containing single_end and paired_end reads, respectively. + +----- + +.. class:: infomark + +**More information** + +Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki + + + + 10.1093/bioinformatics/btr174 + + diff -r 000000000000 -r 8c17ddca0eee bamtools-split-ref.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamtools-split-ref.xml Mon May 15 16:27:18 2017 -0400 @@ -0,0 +1,88 @@ + + into dataset list collection + + bamtools + + + = 0 else n for n in str($input_bam.metadata.reference_names).split(',')]) + #end if + && mkdir -p outputs + && (export I=0; + for i in $ref_list; + do I=\$((++I)); SN=`printf "split_bam.REF_%s.bam" "\$i"`; + if [ -e \$SN ]; then FN=`printf "outputs/split_bam%05d%s.%s.bam" \$((I)) '$name' "\$i"`; mv \$SN \$FN; fi; + done) + ]]> + + + + + + + + + + + + + + + + + + + + + + + + +**What is does** + +BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). + +----- + +.. class:: warningmark + +**DANGER: Multiple Outputs** + +As described below, splitting a BAM dataset(s) on reference name or a tag value can produce very large numbers of outputs. Read below and know what you are doing. + +----- + +**How it works** + +Split alignments by reference name into a dataset list collection. The collection will be in the same order as the input BAM references. + +In cases of unfinished genomes with very large number of reference sequences (scaffolds) +it can generate thousands (if not millions) of output datasets. + + +----- + +.. class:: infomark + +**More information** + +Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki + + + + 10.1093/bioinformatics/btr174 + + diff -r 000000000000 -r 8c17ddca0eee bamtools-split-tag.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamtools-split-tag.xml Mon May 15 16:27:18 2017 -0400 @@ -0,0 +1,70 @@ + + into dataset list collection + + bamtools + + + + + + + + + + + + + + + + + + + + + + + + +**What is does** + +BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). + +----- + +.. class:: warningmark + +**DANGER: Multiple Outputs** + +As described below, splitting a BAM dataset(s) on tag value can produce very large numbers of outputs. Read below and know what you are doing. + +----- + +**How it works** + +Split alignments by tag name into a dataset list collection. + +This can generate a huge number of output datasets depending on the number of distinct values of the TAG. + + +----- + +.. class:: infomark + +**More information** + +Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki + + + + 10.1093/bioinformatics/btr174 + + diff -r 000000000000 -r 8c17ddca0eee bamtools-split.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamtools-split.xml Mon May 15 16:27:18 2017 -0400 @@ -0,0 +1,108 @@ + + BAM datasets on variety of attributes + + bamtools + + + $report && + #for $bam_count, $input_bam in enumerate( $input_bams ): + ln -s "${input_bam}" "localbam_${bam_count}.bam" && + ln -s "${input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" && + #end for + bamtools + split + #if str ( $analysis_type.analysis_type_selector ) == "-tag" : + ${analysis_type.analysis_type_selector} "${analysis_type.tag_name}" + #else + ${analysis_type.analysis_type_selector} + #end if + -stub split_bam + #for $bam_count, $input_bam in enumerate( $input_bams ): + -in "localbam_${bam_count}.bam" + #end for + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What is does** + +BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). + +----- + +.. class:: warningmark + +**DANGER: Multiple Outputs** + +As described below, splitting a BAM dataset(s) on reference name or a tag value can produce very large numbers of outputs. Read below and know what you are doing. + +----- + +**How it works** + +The following options can be specified via "**Split BAM dataset(s) by**" dropdown:: + + Mapping status (-mapped) split mapped/unmapped and generate two output files + named (MAPPED) and (UNMAPPED) containing mapped and unmapped + reads, respectively. + + Pairing status (-paired) split single-end/paired-end alignments and generate two output files + named (SINGLE_END) and (PAIRED_END) containing paired and unpaired + reads, respectively. + + Reference name (-reference) split alignments by reference name. In cases of unfinished genomes with + very large number of reference sequences (scaffolds) it can generate + thousands (if not millions) of output datasets. + + Specific tag (-tag) split alignments based on all values of TAG encountered. Choosing this + option from the menu will allow you to enter the tag name. As was the + case with the reference splitting above, this option can produce very + large number of outputs if a tag has a large number of unique values. + +----- + +.. class:: infomark + +**More information** + +Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki + + + + 10.1093/bioinformatics/btr174 + + diff -r 000000000000 -r 8c17ddca0eee bamtools-split.xml.save --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamtools-split.xml.save Mon May 15 16:27:18 2017 -0400 @@ -0,0 +1,146 @@ + + BAM datasets on variety of attributes + + + + + + + + + + bamtools + + + $report && + #for $bam_count, $input_bam in enumerate( $input_bams ): + ln -s "${input_bam}" "localbam_${bam_count}.bam" && + ln -s "${input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" && + #end for + bamtools + split + #if str ( $analysis_type.analysis_type_selector ) == "-tag" : + ${analysis_type.analysis_type_selector} "${analysis_type.tag_name}" + #else + ${analysis_type.analysis_type_selector} + #end if + -stub split_bam + #for $bam_count, $input_bam in enumerate( $input_bams ): + -in "localbam_${bam_count}.bam" + #end for + #if str ( $analysis_type.analysis_type_selector ) == "-reference" and $analysis_type.output_type == "dataset_collection": + #import re + #set $name = $re.sub('\W','_',$re.sub('\.bam$','',$input_bams[0].name)) + #set $ref_list = ' '.join([$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')]) + && (export I=0; + for i in $ref_list; + do I=\$((++I)); SN=`printf "split_bam.REF_%s.bam" "\$i"`; + if [ -e \$SN ]; + then FN=`printf "split_bam%05d%s.%s.bam" \$((I)) "$name" "\$i"`; + mv \$SN \$FN; + fi; + done) + #end if + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + analysis_type['analysis_type_selector'] in ('-reference','-tag') and analysis_type['output_type'] == 'dataset_collection' + + + + + + + + + + + + + + + + + + + + + + + + +**What is does** + +BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). + +----- + +.. class:: warningmark + +**DANGER: Multiple Outputs** + +As described below, splitting a BAM dataset(s) on reference name or a tag value can produce very large numbers of outputs. Read below and know what you are doing. + +----- + +**How it works** + +The following options can be specified via "**Split BAM dataset(s) by**" dropdown:: + + Mapping status (-mapped) split mapped/unmapped and generate two output files + named (MAPPED) and (UNMAPPED) containing mapped and unmapped + reads, respectively. + + Pairing status (-paired) split single-end/paired-end alignments and generate two output files + named (SINGLE_END) and (PAIRED_END) containing paired and unpaired + reads, respectively. + + Reference name (-reference) split alignments by reference name. In cases of unfinished genomes with + very large number of reference sequences (scaffolds) it can generate + thousands (if not millions) of output datasets. + + Specific tag (-tag) split alignments based on all values of TAG encountered. Choosing this + option from the menu will allow you to enter the tag name. As was the + case with the reference splitting above, this option can produce very + large number of outputs if a tag has a large number of unique values. + +----- + +.. class:: infomark + +**More information** + +Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki + + + + 10.1093/bioinformatics/btr174 + + diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools-input-paired.bam Binary file test-data/bamtools-input-paired.bam has changed diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools-input-tags.bam Binary file test-data/bamtools-input-tags.bam has changed diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools-input1.bam Binary file test-data/bamtools-input1.bam has changed diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools-input2.bam Binary file test-data/bamtools-input2.bam has changed diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools-split-test1.bam Binary file test-data/bamtools-split-test1.bam has changed diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools_input2.chr1 Binary file test-data/bamtools_input2.chr1 has changed diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools_input_tags.TAG_XG_N.bam Binary file test-data/bamtools_input_tags.TAG_XG_N.bam has changed diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools_input_tags.TAG_XG_V.bam Binary file test-data/bamtools_input_tags.TAG_XG_V.bam has changed diff -r 000000000000 -r 8c17ddca0eee test-data/split_bam.MAPPED.bam Binary file test-data/split_bam.MAPPED.bam has changed diff -r 000000000000 -r 8c17ddca0eee test-data/split_bam.PAIRED_END.bam Binary file test-data/split_bam.PAIRED_END.bam has changed diff -r 000000000000 -r 8c17ddca0eee test-data/split_bam.SINGLE_END.bam Binary file test-data/split_bam.SINGLE_END.bam has changed diff -r 000000000000 -r 8c17ddca0eee test-data/split_bam.UNMAPPED.bam Binary file test-data/split_bam.UNMAPPED.bam has changed