changeset 0:f11c1dd0b4ae draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/bam_to_cram commit fdc0553f8451316c1f0b63488db3ef250e99b3ad
author iuc
date Fri, 05 May 2017 13:12:05 -0400
parents
children 38fa15df86b2
files macros.xml samtools_bam_to_cram.xml test-data/test.bam test-data/test.bed test-data/test.cram test-data/test.fa test-data/test.sam
diffstat 7 files changed, 215 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri May 05 13:12:05 2017 -0400
@@ -0,0 +1,71 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.3">samtools</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+                @misc{SAM_def,
+                title={Definition of SAM/BAM format},
+                url = {https://samtools.github.io/hts-specs/SAMv1.pdf},}
+            </citation>
+            <citation type="doi">10.1093/bioinformatics/btp352</citation>
+            <citation type="doi">10.1093/bioinformatics/btr076</citation>
+            <citation type="doi">10.1093/bioinformatics/btr509</citation>
+            <citation type="bibtex">
+                @misc{Danecek_et_al,
+                Author={Danecek, P., Schiffels, S., Durbin, R.},
+                title={Multiallelic calling model in bcftools (-m)},
+                url = {http://samtools.github.io/bcftools/call-m.pdf},}
+            </citation>
+            <citation type="bibtex">
+                @misc{Durbin_VCQC,
+                Author={Durbin, R.},
+                title={Segregation based metric for variant call QC},
+                url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},}
+            </citation>
+            <citation type="bibtex">
+                @misc{Li_SamMath,
+                Author={Li, H.},
+                title={Mathematical Notes on SAMtools Algorithms},
+                url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},}
+            </citation>
+            <citation type="bibtex">
+                @misc{SamTools_github,
+                title={SAMTools GitHub page},
+                url = {https://github.com/samtools/samtools},}
+            </citation>
+        </citations>
+    </xml>
+    <xml name="version_command">
+        <version_command>samtools 2&gt;&amp;1 | grep Version</version_command>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" level="fatal" description="Error" />
+        </stdio>
+    </xml>
+    <token name="@no-chrom-options@">
+-----
+
+.. class:: warningmark
+
+**No options available? How to re-detect metadata**
+
+If you see a &quot;No options available&quot; within the &quot;**Select references (chromosomes and contigs) you would like to restrict bam to**&quot; drop down, you need to re-detect metadata for the dataset you are trying to process. To do this follow these steps:
+
+1. Click on the **pencil** icon adjacent to the dataset in the history
+2. A new menu will appear in the center pane of the interface
+3. Click **Datatype** tab
+4. Set **New Type** to **BAM**
+5. Click **Save**
+
+The medatada will be re-detected and you will be able to see the list of reference sequences in the &quot;**Select references (chromosomes and contigs) you would like to restrict bam to**&quot; drop-down.
+
+    </token>
+
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_bam_to_cram.xml	Fri May 05 13:12:05 2017 -0400
@@ -0,0 +1,125 @@
+<tool id="samtools_bam_to_cram" name="samtools BAM to CRAM" version="1.2.0">
+    <description>convert BAM alignments to CRAM format</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <expand macro="version_command"/>
+
+    <command><![CDATA[
+        #if str( $input_alignment.metadata.bam_index ) != "None":
+            ln
+                -f
+                -s
+                '${input_alignment.metadata.bam_index}'
+                '${input_alignment}.bai'
+                &&
+        #end if
+
+        samtools view
+            #if $parameter_regions.target_region == "regions_bed_file"
+                -L '${parameter_regions.regions_bed_file}'
+            #end if
+
+            -@\${GALAXY_SLOTS:-1}
+            -C
+            -T '${reference_source.input_reference}'
+            -o '${output_alignment}'
+
+            '${input_alignment}'
+
+            #if $parameter_regions.target_region == "region"
+                '${parameter_regions.region_string}'
+            #end if
+    ]]></command>
+
+    <inputs>
+        <param name="input_alignment" type="data" format="bam,sam" label="BAM (or SAM) alignment file"/>
+        <conditional name="reference_source">
+            <param name="reference_source_selector" type="select" label="Load reference genome from">
+                <option value="cached">Local cache</option>
+                <option value="history">History</option>
+            </param>
+            <when value="cached">
+                <param name="input_reference" type="select" format="fasta" label="Genome reference FASTA file">
+                    <options from_data_table="all_fasta">
+                        <filter type="data_meta" ref="input_alignment" key="dbkey" column="1" />
+                    </options>
+                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="input_reference" type="data" format="fasta" label="Genome reference FASTA file"/>
+            </when>
+        </conditional>
+        <conditional name="parameter_regions">
+            <param name="target_region" type="select" label="Choose conversion within specific genomic region(s)">
+                <option value="entire_input_file">Entire BAM alignment file</option>
+                <option value="region">Specific region</option>
+                <option value="regions_bed_file">List of specific regions (BED file)</option>
+            </param>
+            <when value="entire_input_file" />
+            <when value="region">
+                <param name="region_string" type="text" label="Samtools: region in which pileup is generated" help="e.g. chrX or chr:start-end" />
+            </when>
+            <when value="regions_bed_file">
+                <param name="regions_bed_file" type="data" format="bed"
+                       label="only include reads overlapping this BED FILE" argument="-L"/>
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data format="cram" name="output_alignment" label="$tool.name on ${on_string}.cram"></data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_alignment" value="test.bam" ftype="bam" />
+            <param name="reference_source_selector" value="history" />
+            <param name="input_reference" value="test.fa" />
+            <param name="target_region" value="entire_input_file" />
+
+            <output name="output_alignment" file="test.cram"  compare="sim_size" delta="250" />
+        </test>
+        <test>
+            <param name="input_alignment" value="test.sam" ftype="sam" />
+            <param name="reference_source_selector" value="history" />
+            <param name="input_reference" value="test.fa" />
+            <param name="target_region" value="entire_input_file" />
+
+            <output name="output_alignment" file="test.cram"  compare="sim_size" delta="250" />
+        </test>
+        <test>
+            <param name="input_alignment" value="test.bam" ftype="bam" />
+            <param name="reference_source_selector" value="history" />
+            <param name="input_reference" value="test.fa" />
+            <param name="target_region" value="region" />
+            <param name="region_string" value="CHROMOSOME_I" />
+
+            <output name="output_alignment" file="test.cram"  compare="sim_size" delta="250" />
+        </test>
+        <test>
+            <param name="input_alignment" value="test.bam" ftype="bam" />
+            <param name="reference_source_selector" value="history" />
+            <param name="input_reference" value="test.fa" />
+            <param name="target_region" value="regions_bed_file" />
+            <param name="regions_bed_file" value="test.bed" ftype="bed" />
+
+            <output name="output_alignment" file="test.cram"  compare="sim_size" delta="250" />
+        </test>
+    </tests>
+
+    <help>
+
+**What this tool does**
+
+Converts alignments from the BAM format to the CRAM format using the SAMTools_ toolkit. The CRAM format does additional compression relative to the reference genome which makes the compression in terms of file size more efficient.
+
+.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
+    </help>
+
+    <expand macro="citations" />
+</tool>
Binary file test-data/test.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.bed	Fri May 05 13:12:05 2017 -0400
@@ -0,0 +1,1 @@
+CHROMOSOME_I	1	120
Binary file test-data/test.cram has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.fa	Fri May 05 13:12:05 2017 -0400
@@ -0,0 +1,3 @@
+>CHROMOSOME_I
+gcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagc
+ctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcct
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.sam	Fri May 05 13:12:05 2017 -0400
@@ -0,0 +1,15 @@
+@HD	VN:1.4	SO:unsorted
+@SQ	SN:CHROMOSOME_I	LN:100
+@RG	ID:UNKNOWN	SM:UNKNOWN
+@PG	ID:bowtie2	PN:bowtie2	VN:2.0.0-beta5
+@PG	ID:0	CL:aaaaa/aaa/aaaaa/aaaaaa/aaaaaaaaa/aaa/iuc/package_aaaaaaaaa_x_y/aaaaaaaaaaaa/bin/aaaaaaaaaaaaaaaaa aaaaaaaaaa /aaaa/aaaaa/aaa/aaaaaaaaaaaaaaaaaaa/tools/aaaaaaaaa/test-data/test.cram aa /aaaa/aaaaa/aaa/aaaaaaaaaaaaaaaaaaa/tools/aaaaaaaaa/test-data/test.fa -O test	PN:samtools	VN:1.2
+SRR065390.14978392	16	CHROMOSOME_I	2	1	27M1D73M	*	0	0	CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA	#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	RG:Z:UNKNOWN	XG:i:1	XM:i:5	XN:i:0	XO:i:1	AS:i:-18	XS:i:-18	YT:Z:UU
+SRR065390.921023	16	CHROMOSOME_I	3	12	100M	*	0	0	CTAAGCCTAAATCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA	###############################################???88:;98768700000<>:BBA?BBAB?BBBBBBBB>B>BB::;?:00000	RG:Z:UNKNOWN	XG:i:0	XM:i:3	XN:i:0	XO:i:0	AS:i:-6	XS:i:-13	YT:Z:UU
+SRR065390.1871511	16	CHROMOSOME_I	3	1	100M	*	0	0	CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA	<?@<@A8>0:BB@>B<=B@???@=8@B>BB@CA@DACDCBBCCCA@CCCCACCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	RG:Z:UNKNOWN	XG:i:0	XM:i:0	XN:i:0	XO:i:0	AS:i:0	XS:i:0	YT:Z:UU
+SRR065390.3743423	16	CHROMOSOME_I	3	1	100M	*	0	0	CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA	##################?6@:7<=@3=@ABAAB>BDBBABADABDDDBDDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	RG:Z:UNKNOWN	XG:i:0	XM:i:0	XN:i:0	XO:i:0	AS:i:0	XS:i:0	YT:Z:UU
+SRR065390.4251890	16	CHROMOSOME_I	3	1	100M	*	0	0	CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA	###########@BB=BCBBC?B>B;>B@@ADBBB@DBBBBDCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	RG:Z:UNKNOWN	XG:i:0	XM:i:0	XN:i:0	XO:i:0	AS:i:0	XS:i:0	YT:Z:UU
+SRR065390.5238868	16	CHROMOSOME_I	3	1	100M	*	0	0	CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA	@,=@@D8D;?BBB>;?BBB==BB@D;>D>BBB>BBDDB<DABADCACDCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	RG:Z:UNKNOWN	XG:i:0	XM:i:0	XN:i:0	XO:i:0	AS:i:0	XS:i:0	YT:Z:UU
+SRR065390.6023338	0	CHROMOSOME_I	3	1	100M	*	0	0	CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAAGCTAC	CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCDDDBCCABB=DABBA?################	RG:Z:UNKNOWN	XG:i:0	XM:i:3	XN:i:0	XO:i:0	AS:i:-6	XS:i:-6	YT:Z:UU
+SRR065390.6815812	16	CHROMOSOME_I	3	1	100M	*	0	0	CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA	###############?@.@?B;B0B=;<DADB@@BDDBBDDBCBCBD@CCDCCCCCCCDCCCCCCCCACCCCCCCCCCBCCCCCCDCCCCCCCCCCCBCC	RG:Z:UNKNOWN	XG:i:0	XM:i:0	XN:i:0	XO:i:0	AS:i:0	XS:i:0	YT:Z:UU
+SRR065390.6905811	16	CHROMOSOME_I	3	1	100M	*	0	0	CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA	#######################BB@>A<BC>@@BCCB@=BACBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	RG:Z:UNKNOWN	XG:i:0	XM:i:0	XN:i:0	XO:i:0	AS:i:0	XS:i:0	YT:Z:UU
+SRR065390.8289592	16	CHROMOSOME_I	3	1	100M	*	0	0	CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA	###############################A?@C9@@BC=AABDD@A@DC@CB=@BA?6@CCAAC@+CCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCC	RG:Z:UNKNOWN	XG:i:0	XM:i:0	XN:i:0	XO:i:0	AS:i:0	XS:i:0	YT:Z:UU