# HG changeset patch
# User jjohnson
# Date 1494880038 14400
# Node ID 8c17ddca0eeee85c11fdd4e0db83c8a64f2ef048
Uploaded
diff -r 000000000000 -r 8c17ddca0eee ._bamtools-split-ref.xml
Binary file ._bamtools-split-ref.xml has changed
diff -r 000000000000 -r 8c17ddca0eee ._bamtools-split-tag.xml
Binary file ._bamtools-split-tag.xml has changed
diff -r 000000000000 -r 8c17ddca0eee bamtools-split-mapped.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bamtools-split-mapped.xml Mon May 15 16:27:18 2017 -0400
@@ -0,0 +1,55 @@
+
+
+
+ bamtools
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What is does**
+
+BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
+
+-----
+
+.. class:: warningmark
+
+
+**How it works**
+
+Splits the input BAM file into 2 output files named (MAPPED) and (UNMAPPED) containing mapped and unmapped reads, respectively.
+
+-----
+
+.. class:: infomark
+
+**More information**
+
+Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
+
+
+
+ 10.1093/bioinformatics/btr174
+
+
diff -r 000000000000 -r 8c17ddca0eee bamtools-split-paired.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bamtools-split-paired.xml Mon May 15 16:27:18 2017 -0400
@@ -0,0 +1,56 @@
+
+
+
+ bamtools
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What is does**
+
+BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
+
+-----
+
+.. class:: warningmark
+
+
+**How it works**
+
+
+Splits the input BAM file into 2 output files named (SINGLE_END) and (PAIRED_END) containing single_end and paired_end reads, respectively.
+
+-----
+
+.. class:: infomark
+
+**More information**
+
+Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
+
+
+
+ 10.1093/bioinformatics/btr174
+
+
diff -r 000000000000 -r 8c17ddca0eee bamtools-split-ref.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bamtools-split-ref.xml Mon May 15 16:27:18 2017 -0400
@@ -0,0 +1,88 @@
+
+ into dataset list collection
+
+ bamtools
+
+
+ = 0 else n for n in str($input_bam.metadata.reference_names).split(',')])
+ #end if
+ && mkdir -p outputs
+ && (export I=0;
+ for i in $ref_list;
+ do I=\$((++I)); SN=`printf "split_bam.REF_%s.bam" "\$i"`;
+ if [ -e \$SN ]; then FN=`printf "outputs/split_bam%05d%s.%s.bam" \$((I)) '$name' "\$i"`; mv \$SN \$FN; fi;
+ done)
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What is does**
+
+BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
+
+-----
+
+.. class:: warningmark
+
+**DANGER: Multiple Outputs**
+
+As described below, splitting a BAM dataset(s) on reference name or a tag value can produce very large numbers of outputs. Read below and know what you are doing.
+
+-----
+
+**How it works**
+
+Split alignments by reference name into a dataset list collection. The collection will be in the same order as the input BAM references.
+
+In cases of unfinished genomes with very large number of reference sequences (scaffolds)
+it can generate thousands (if not millions) of output datasets.
+
+
+-----
+
+.. class:: infomark
+
+**More information**
+
+Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
+
+
+
+ 10.1093/bioinformatics/btr174
+
+
diff -r 000000000000 -r 8c17ddca0eee bamtools-split-tag.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bamtools-split-tag.xml Mon May 15 16:27:18 2017 -0400
@@ -0,0 +1,70 @@
+
+ into dataset list collection
+
+ bamtools
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What is does**
+
+BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
+
+-----
+
+.. class:: warningmark
+
+**DANGER: Multiple Outputs**
+
+As described below, splitting a BAM dataset(s) on tag value can produce very large numbers of outputs. Read below and know what you are doing.
+
+-----
+
+**How it works**
+
+Split alignments by tag name into a dataset list collection.
+
+This can generate a huge number of output datasets depending on the number of distinct values of the TAG.
+
+
+-----
+
+.. class:: infomark
+
+**More information**
+
+Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
+
+
+
+ 10.1093/bioinformatics/btr174
+
+
diff -r 000000000000 -r 8c17ddca0eee bamtools-split.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bamtools-split.xml Mon May 15 16:27:18 2017 -0400
@@ -0,0 +1,108 @@
+
+ BAM datasets on variety of attributes
+
+ bamtools
+
+
+ $report &&
+ #for $bam_count, $input_bam in enumerate( $input_bams ):
+ ln -s "${input_bam}" "localbam_${bam_count}.bam" &&
+ ln -s "${input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &&
+ #end for
+ bamtools
+ split
+ #if str ( $analysis_type.analysis_type_selector ) == "-tag" :
+ ${analysis_type.analysis_type_selector} "${analysis_type.tag_name}"
+ #else
+ ${analysis_type.analysis_type_selector}
+ #end if
+ -stub split_bam
+ #for $bam_count, $input_bam in enumerate( $input_bams ):
+ -in "localbam_${bam_count}.bam"
+ #end for
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What is does**
+
+BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
+
+-----
+
+.. class:: warningmark
+
+**DANGER: Multiple Outputs**
+
+As described below, splitting a BAM dataset(s) on reference name or a tag value can produce very large numbers of outputs. Read below and know what you are doing.
+
+-----
+
+**How it works**
+
+The following options can be specified via "**Split BAM dataset(s) by**" dropdown::
+
+ Mapping status (-mapped) split mapped/unmapped and generate two output files
+ named (MAPPED) and (UNMAPPED) containing mapped and unmapped
+ reads, respectively.
+
+ Pairing status (-paired) split single-end/paired-end alignments and generate two output files
+ named (SINGLE_END) and (PAIRED_END) containing paired and unpaired
+ reads, respectively.
+
+ Reference name (-reference) split alignments by reference name. In cases of unfinished genomes with
+ very large number of reference sequences (scaffolds) it can generate
+ thousands (if not millions) of output datasets.
+
+ Specific tag (-tag) split alignments based on all values of TAG encountered. Choosing this
+ option from the menu will allow you to enter the tag name. As was the
+ case with the reference splitting above, this option can produce very
+ large number of outputs if a tag has a large number of unique values.
+
+-----
+
+.. class:: infomark
+
+**More information**
+
+Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
+
+
+
+ 10.1093/bioinformatics/btr174
+
+
diff -r 000000000000 -r 8c17ddca0eee bamtools-split.xml.save
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bamtools-split.xml.save Mon May 15 16:27:18 2017 -0400
@@ -0,0 +1,146 @@
+
+ BAM datasets on variety of attributes
+
+
+
+
+
+
+
+
+
+ bamtools
+
+
+ $report &&
+ #for $bam_count, $input_bam in enumerate( $input_bams ):
+ ln -s "${input_bam}" "localbam_${bam_count}.bam" &&
+ ln -s "${input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &&
+ #end for
+ bamtools
+ split
+ #if str ( $analysis_type.analysis_type_selector ) == "-tag" :
+ ${analysis_type.analysis_type_selector} "${analysis_type.tag_name}"
+ #else
+ ${analysis_type.analysis_type_selector}
+ #end if
+ -stub split_bam
+ #for $bam_count, $input_bam in enumerate( $input_bams ):
+ -in "localbam_${bam_count}.bam"
+ #end for
+ #if str ( $analysis_type.analysis_type_selector ) == "-reference" and $analysis_type.output_type == "dataset_collection":
+ #import re
+ #set $name = $re.sub('\W','_',$re.sub('\.bam$','',$input_bams[0].name))
+ #set $ref_list = ' '.join([$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')])
+ && (export I=0;
+ for i in $ref_list;
+ do I=\$((++I)); SN=`printf "split_bam.REF_%s.bam" "\$i"`;
+ if [ -e \$SN ];
+ then FN=`printf "split_bam%05d%s.%s.bam" \$((I)) "$name" "\$i"`;
+ mv \$SN \$FN;
+ fi;
+ done)
+ #end if
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ analysis_type['analysis_type_selector'] in ('-mapped','-paired') or analysis_type['output_type'] != 'dataset_collection'
+
+
+
+ analysis_type['analysis_type_selector'] in ('-reference','-tag') and analysis_type['output_type'] == 'dataset_collection'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What is does**
+
+BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
+
+-----
+
+.. class:: warningmark
+
+**DANGER: Multiple Outputs**
+
+As described below, splitting a BAM dataset(s) on reference name or a tag value can produce very large numbers of outputs. Read below and know what you are doing.
+
+-----
+
+**How it works**
+
+The following options can be specified via "**Split BAM dataset(s) by**" dropdown::
+
+ Mapping status (-mapped) split mapped/unmapped and generate two output files
+ named (MAPPED) and (UNMAPPED) containing mapped and unmapped
+ reads, respectively.
+
+ Pairing status (-paired) split single-end/paired-end alignments and generate two output files
+ named (SINGLE_END) and (PAIRED_END) containing paired and unpaired
+ reads, respectively.
+
+ Reference name (-reference) split alignments by reference name. In cases of unfinished genomes with
+ very large number of reference sequences (scaffolds) it can generate
+ thousands (if not millions) of output datasets.
+
+ Specific tag (-tag) split alignments based on all values of TAG encountered. Choosing this
+ option from the menu will allow you to enter the tag name. As was the
+ case with the reference splitting above, this option can produce very
+ large number of outputs if a tag has a large number of unique values.
+
+-----
+
+.. class:: infomark
+
+**More information**
+
+Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
+
+
+
+ 10.1093/bioinformatics/btr174
+
+
diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools-input-paired.bam
Binary file test-data/bamtools-input-paired.bam has changed
diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools-input-tags.bam
Binary file test-data/bamtools-input-tags.bam has changed
diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools-input1.bam
Binary file test-data/bamtools-input1.bam has changed
diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools-input2.bam
Binary file test-data/bamtools-input2.bam has changed
diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools-split-test1.bam
Binary file test-data/bamtools-split-test1.bam has changed
diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools_input2.chr1
Binary file test-data/bamtools_input2.chr1 has changed
diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools_input_tags.TAG_XG_N.bam
Binary file test-data/bamtools_input_tags.TAG_XG_N.bam has changed
diff -r 000000000000 -r 8c17ddca0eee test-data/bamtools_input_tags.TAG_XG_V.bam
Binary file test-data/bamtools_input_tags.TAG_XG_V.bam has changed
diff -r 000000000000 -r 8c17ddca0eee test-data/split_bam.MAPPED.bam
Binary file test-data/split_bam.MAPPED.bam has changed
diff -r 000000000000 -r 8c17ddca0eee test-data/split_bam.PAIRED_END.bam
Binary file test-data/split_bam.PAIRED_END.bam has changed
diff -r 000000000000 -r 8c17ddca0eee test-data/split_bam.SINGLE_END.bam
Binary file test-data/split_bam.SINGLE_END.bam has changed
diff -r 000000000000 -r 8c17ddca0eee test-data/split_bam.UNMAPPED.bam
Binary file test-data/split_bam.UNMAPPED.bam has changed