changeset 9:0d2460a1ce93 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/bam_to_sam commit 9c5a35ce695c3d134e41d8695487edd5f71ea33c
author iuc
date Sun, 08 Sep 2024 03:21:37 +0000
parents c124315e2b88
children
files bam_to_sam.xml macros.xml test-data/bam_to_sam_out1.sam test-data/bam_to_sam_out2.sam
diffstat 4 files changed, 61 insertions(+), 51 deletions(-) [+]
line wrap: on
line diff
--- a/bam_to_sam.xml	Tue Sep 28 15:57:02 2021 +0000
+++ b/bam_to_sam.xml	Sun Sep 08 03:21:37 2024 +0000
@@ -1,4 +1,4 @@
-<tool id="bam_to_sam" name="BAM-to-SAM" version="2.0.2" profile="@PROFILE@">
+<tool id="bam_to_sam" name="BAM-to-SAM" version="2.0.4" profile="@PROFILE@">
     <description>convert BAM to SAM</description>
 
     <macros>
--- a/macros.xml	Tue Sep 28 15:57:02 2021 +0000
+++ b/macros.xml	Sun Sep 08 03:21:37 2024 +0000
@@ -5,8 +5,15 @@
             <yield/>
         </requirements>
     </xml>
-    <token name="@TOOL_VERSION@">1.13</token>
-    <token name="@PROFILE@">20.05</token>
+    <!-- NOTE: for some tools only the version of the requirement but not the
+        tool's version is controlled by the TOOL_VERSION token 
+        (because their version is ahead of the requirement version .. 
+         please only bump the minor version in order to let the requirement
+         version catch up eventually). To find the tools check:
+        `grep "<tool" . -r | grep -v VERSION_SUFFIX | cut -d":" -f 1` -->
+    <token name="@TOOL_VERSION@">1.20</token>
+    <token name="@VERSION_SUFFIX@">2</token>
+    <token name="@PROFILE@">22.05</token>
     <token name="@FLAGS@"><![CDATA[
         #set $flags = 0
         #if $filter
@@ -50,27 +57,60 @@
         #end for
     ]]></token>
     <token name="@PREPARE_FASTA_IDX@"><![CDATA[
-        ##checks for reference data ($addref_cond.addref_select=="history" or =="cached")
-        ##and sets the -t/-T parameters accordingly:
-        ##- in case of history a symbolic link is used because samtools (view) will generate
-        ##  the index which might not be possible in the directory containing the fasta file
-        ##- in case of cached the absolute path is used which allows to read the cram file
-        ##  without specifying the reference
+        ## Make the user-selected reference genome, if any, accessible through
+        ## a shell variable $reffa, index the reference if necessary, and make
+        ## the fai-index file available through a shell variable $reffai.
+
+        ## For a cached genome simply sets the shell variables to point to the
+        ## genome file and its precalculated index.
+        ## For a genome from the user's history, if that genome is a plain
+        ## fasta file, the code creates a symlink in the pwd, creates the fai
+        ## index file next to it, then sets the shell variables to point to the
+        ## symlink and its index.
+        ## For a fasta.gz dataset from the user's history, it tries the same,
+        ## but this will only succeed if the file got compressed with bgzip.
+        ## For a regular gzipped file samtools faidx will fail, in which case
+        ## the code falls back to decompressing to plain fasta before
+        ## reattempting the indexing.
+        ## Indexing of a bgzipped file produces a regular fai index file *and*
+        ## a compressed gzi file. The former is identical to the fai index of
+        ## the uncompressed fasta.
+
+        ## If the user has not selected a reference (it's an optional parameter
+        ## in some samtools wrappers), a cheetah boolean use_ref is set to
+        ## False to encode that fact.
+
+        #set use_ref=True
         #if $addref_cond.addref_select == "history":
-            ln -s '${addref_cond.ref}' reference.fa &&
-            samtools faidx reference.fa &&
-            #set reffa="reference.fa"
-            #set reffai="reference.fa.fai"
+            #if $addref_cond.ref.is_of_type('fasta'):
+                reffa="reference.fa" &&
+                ln -s '${addref_cond.ref}' \$reffa &&
+                samtools faidx \$reffa &&
+            #else:
+                reffa="reference.fa.gz" &&
+                ln -s '${addref_cond.ref}' \$reffa &&
+                {
+                    samtools faidx \$reffa ||
+                    {
+                        echo "Failed to index compressed reference. Trying decompressed ..." 1>&2 &&
+                        gzip -dc \$reffa > reference.fa &&
+                        reffa="reference.fa" &&
+                        samtools faidx \$reffa;
+                    }
+                } &&
+            #end if
+            reffai=\$reffa.fai &&
         #elif $addref_cond.addref_select == "cached":
-            #set reffa=str($addref_cond.ref.fields.path)
-            #set reffai=str($addref_cond.ref.fields.path)+".fai"
+            ## in case of cached the absolute path is used which allows to read 
+            ## a cram file  without specifying the reference
+            reffa='${addref_cond.ref.fields.path}' &&
+            reffai=\$reffa.fai &&
         #else
-            #set reffa=None
-            #set reffai=None
+            #set use_ref=False
         #end if
     ]]></token>
 
-    <xml name="optional_reference">
+    <xml name="optional_reference" token_help="" token_argument="">
         <conditional name="addref_cond">
             <param name="addref_select" type="select" label="Use a reference sequence">
                 <help>@HELP@</help>
@@ -179,37 +219,7 @@
 
     <xml name="citations">
         <citations>
-            <citation type="bibtex">
-                @misc{SAM_def,
-                title={Definition of SAM/BAM format},
-                url = {https://samtools.github.io/hts-specs/},}
-            </citation>
-            <citation type="doi">10.1093/bioinformatics/btp352</citation>
-            <citation type="doi">10.1093/bioinformatics/btr076</citation>
-            <citation type="doi">10.1093/bioinformatics/btr509</citation>
-            <citation type="bibtex">
-                @misc{Danecek_et_al,
-                Author={Danecek, P., Schiffels, S., Durbin, R.},
-                title={Multiallelic calling model in bcftools (-m)},
-                url = {http://samtools.github.io/bcftools/call-m.pdf},}
-            </citation>
-            <citation type="bibtex">
-                @misc{Durbin_VCQC,
-                Author={Durbin, R.},
-                title={Segregation based metric for variant call QC},
-                url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},}
-            </citation>
-            <citation type="bibtex">
-                @misc{Li_SamMath,
-                Author={Li, H.},
-                title={Mathematical Notes on SAMtools Algorithms},
-                url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},}
-            </citation>
-            <citation type="bibtex">
-                @misc{SamTools_github,
-                title={SAMTools GitHub page},
-                url = {https://github.com/samtools/samtools},}
-            </citation>
+            <citation type="doi">10.1093/gigascience/giab008</citation>
         </citations>
     </xml>
     <xml name="version_command">
--- a/test-data/bam_to_sam_out1.sam	Tue Sep 28 15:57:02 2021 +0000
+++ b/test-data/bam_to_sam_out1.sam	Sun Sep 08 03:21:37 2024 +0000
@@ -4,7 +4,7 @@
 @SQ	SN:chr8	LN:202
 @RG	ID:0	SM:Hi,Mom!
 @PG	ID:1	PN:Hey!	VN:2.0
-@PG	ID:samtools	PN:samtools	PP:1	VN:1.12	CL:samtools view -o /tmp/tmpl_eh5cxt/files/3/b/2/dataset_3b2eeebc-108d-4ce9-a5b9-17e224264121.dat -h /tmp/tmpl_eh5cxt/files/1/2/d/dataset_12de9904-ebd8-436b-a125-8f034068385e.dat
+@PG	ID:samtools	PN:samtools	PP:1	VN:1.20	CL:samtools view -o /tmp/tmp3bwmizj1/job_working_directory/000/2/outputs/dataset_f1239316-7904-41f9-b3dd-a740a70acf8b.dat -h /tmp/tmp3bwmizj1/files/4/4/a/dataset_44af64ee-5cc1-4454-9cf0-6a3a24296b69.dat
 both_reads_align_clip_marked	83	chr7	1	255	101M	=	302	201	CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN	)'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/&	RG:Z:0
 both_reads_present_only_first_aligns	89	chr7	1	255	101M	*	0	0	CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN	)'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/&	RG:Z:0
 read_2_too_many_gaps	83	chr7	1	255	101M	=	302	201	CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN	)'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/&	RG:Z:0
--- a/test-data/bam_to_sam_out2.sam	Tue Sep 28 15:57:02 2021 +0000
+++ b/test-data/bam_to_sam_out2.sam	Sun Sep 08 03:21:37 2024 +0000
@@ -4,4 +4,4 @@
 @SQ	SN:chr8	LN:202
 @RG	ID:0	SM:Hi,Mom!
 @PG	ID:1	PN:Hey!	VN:2.0
-@PG	ID:samtools	PN:samtools	PP:1	VN:1.12	CL:samtools view -o /tmp/tmpl_eh5cxt/files/3/0/2/dataset_3025757e-159d-4ced-a398-2d7dc823e34b.dat -H /tmp/tmpl_eh5cxt/files/9/5/d/dataset_95dbe51c-ade9-4f37-9399-79ec158c1516.dat
+@PG	ID:samtools	PN:samtools	PP:1	VN:1.20	CL:samtools view -o /tmp/tmp3bwmizj1/job_working_directory/000/4/outputs/dataset_e689a4f6-7c14-4a65-9155-214ed5cbcfa7.dat -H /tmp/tmp3bwmizj1/files/7/d/5/dataset_7d5cef18-567a-4e59-86c6-0dcadae11b36.dat