# HG changeset patch
# User iuc
# Date 1725765836 0
# Node ID 8fecc86e574a26ea157d9421651dd8d6cb721f2a
# Parent a95f78faca04a5127a6358ea38a2edfb87580a58
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_stats commit 9c5a35ce695c3d134e41d8695487edd5f71ea33c
diff -r a95f78faca04 -r 8fecc86e574a macros.xml
--- a/macros.xml Wed Jun 22 07:47:48 2022 +0000
+++ b/macros.xml Sun Sep 08 03:23:56 2024 +0000
@@ -5,8 +5,15 @@
- 1.13
- 20.05
+
+ 1.20
+ 2
+ 22.05
-
+ @HELP@
@@ -212,37 +219,7 @@
-
- @misc{SAM_def,
- title={Definition of SAM/BAM format},
- url = {https://samtools.github.io/hts-specs/},}
-
- 10.1093/bioinformatics/btp352
- 10.1093/bioinformatics/btr076
- 10.1093/bioinformatics/btr509
-
- @misc{Danecek_et_al,
- Author={Danecek, P., Schiffels, S., Durbin, R.},
- title={Multiallelic calling model in bcftools (-m)},
- url = {http://samtools.github.io/bcftools/call-m.pdf},}
-
-
- @misc{Durbin_VCQC,
- Author={Durbin, R.},
- title={Segregation based metric for variant call QC},
- url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},}
-
-
- @misc{Li_SamMath,
- Author={Li, H.},
- title={Mathematical Notes on SAMtools Algorithms},
- url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},}
-
-
- @misc{SamTools_github,
- title={SAMTools GitHub page},
- url = {https://github.com/samtools/samtools},}
-
+ 10.1093/gigascience/giab008
diff -r a95f78faca04 -r 8fecc86e574a samtools_stats.xml
--- a/samtools_stats.xml Wed Jun 22 07:47:48 2022 +0000
+++ b/samtools_stats.xml Sun Sep 08 03:23:56 2024 +0000
@@ -1,4 +1,4 @@
-
+generate statistics for BAM datasetmacros.xml
@@ -45,7 +45,7 @@
#if $use_ref:
--ref-seq "\$reffa"
#end if
- ## TODO currently not implemented in Galaxy
+ ## currently not implemented in Galaxy
## generates STR_VALUE.bamstat where STR is given by -P and VALUE is a value of the TAG given by -S
## needs some discover data sets action...
## -P, --split-prefix STR
@@ -84,7 +84,7 @@
-
+
@@ -131,16 +131,16 @@
-
+
-
-
-
+
+
+
@@ -153,18 +153,17 @@
-
-
+
split_output_cond['split_output_selector'] == 'no'
-
+ split_output_cond['split_output_selector'] == 'yes'
-
+
@@ -173,7 +172,7 @@
-
+
@@ -185,7 +184,7 @@
-
+
@@ -199,7 +198,7 @@
-
+
@@ -212,7 +211,7 @@
-
+
@@ -227,7 +226,7 @@
-
+
@@ -240,7 +239,7 @@
-
+
@@ -255,7 +254,7 @@
-
+
@@ -267,7 +266,7 @@
-
+
@@ -280,7 +279,7 @@
-
+
@@ -292,7 +291,7 @@
-
+
@@ -305,7 +304,7 @@
-
+
@@ -313,7 +312,7 @@
-
+
diff -r a95f78faca04 -r 8fecc86e574a test-data/1.stats.expected
--- a/test-data/1.stats.expected Wed Jun 22 07:47:48 2022 +0000
+++ b/test-data/1.stats.expected Sun Sep 08 03:23:56 2024 +0000
@@ -1,4 +1,4 @@
-# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats
# This file contains statistics for all reads.
# The command line was: stats --ref-seq reference.fa -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
@@ -419,6 +419,8 @@
FRL 35 1
# Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count
LRL 35 1
+# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count
+MAPQ 40 2
# Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions
# Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev)
# Coverage distribution. Use `grep ^COV | cut -f 2-` to extract this part.
diff -r a95f78faca04 -r 8fecc86e574a test-data/11.stats.expected
--- a/test-data/11.stats.expected Wed Jun 22 07:47:48 2022 +0000
+++ b/test-data/11.stats.expected Sun Sep 08 03:23:56 2024 +0000
@@ -1,4 +1,4 @@
-# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats
# This file contains statistics for all reads.
# The command line was: stats -@ 0 infile ref1:10-24 ref1:30-46 ref1:39-56
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
@@ -183,6 +183,32 @@
FRL 10 14
# Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count
LRL 10 12
+# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count
+MAPQ 0 1
+MAPQ 1 1
+MAPQ 2 1
+MAPQ 3 1
+MAPQ 4 1
+MAPQ 5 1
+MAPQ 6 1
+MAPQ 7 1
+MAPQ 8 1
+MAPQ 9 1
+MAPQ 10 1
+MAPQ 11 1
+MAPQ 12 1
+MAPQ 13 1
+MAPQ 14 1
+MAPQ 15 1
+MAPQ 16 1
+MAPQ 17 1
+MAPQ 18 1
+MAPQ 19 1
+MAPQ 20 1
+MAPQ 21 1
+MAPQ 22 1
+MAPQ 23 1
+MAPQ 50 2
# Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions
# Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev)
# Coverage distribution. Use `grep ^COV | cut -f 2-` to extract this part.
diff -r a95f78faca04 -r 8fecc86e574a test-data/11.stats.g4.expected
--- a/test-data/11.stats.g4.expected Wed Jun 22 07:47:48 2022 +0000
+++ b/test-data/11.stats.g4.expected Sun Sep 08 03:23:56 2024 +0000
@@ -1,4 +1,4 @@
-# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats
# This file contains statistics for all reads.
# The command line was: stats -g 4 -@ 0 infile ref1:10-24 ref1:30-46 ref1:39-56
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
@@ -183,6 +183,32 @@
FRL 10 14
# Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count
LRL 10 12
+# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count
+MAPQ 0 1
+MAPQ 1 1
+MAPQ 2 1
+MAPQ 3 1
+MAPQ 4 1
+MAPQ 5 1
+MAPQ 6 1
+MAPQ 7 1
+MAPQ 8 1
+MAPQ 9 1
+MAPQ 10 1
+MAPQ 11 1
+MAPQ 12 1
+MAPQ 13 1
+MAPQ 14 1
+MAPQ 15 1
+MAPQ 16 1
+MAPQ 17 1
+MAPQ 18 1
+MAPQ 19 1
+MAPQ 20 1
+MAPQ 21 1
+MAPQ 22 1
+MAPQ 23 1
+MAPQ 50 2
# Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions
# Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev)
# Coverage distribution. Use `grep ^COV | cut -f 2-` to extract this part.
diff -r a95f78faca04 -r 8fecc86e574a test-data/12.2reads.nooverlap.expected
--- a/test-data/12.2reads.nooverlap.expected Wed Jun 22 07:47:48 2022 +0000
+++ b/test-data/12.2reads.nooverlap.expected Sun Sep 08 03:23:56 2024 +0000
@@ -1,6 +1,6 @@
-# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats
# This file contains statistics for all reads.
-# The command line was: stats -t /tmp/tmp0r5zs075/files/b/2/1/dataset_b2175431-044e-449d-8f60-1bfd33679b61.dat -p -@ 0 infile
+# The command line was: stats -t /tmp/tmp5q_kwqc0/files/5/e/6/dataset_5e6cda5d-79e8-4b8f-9e14-97f3be9f87de.dat -p -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)
CHK 5b31676a b0edee94 471895da
@@ -855,6 +855,9 @@
FRL 100 1
# Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count
LRL 100 1
+# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count
+MAPQ 7 1
+MAPQ 37 1
# Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions
ID 2 0 1
ID 3 1 0
diff -r a95f78faca04 -r 8fecc86e574a test-data/12.2reads.overlap.expected
--- a/test-data/12.2reads.overlap.expected Wed Jun 22 07:47:48 2022 +0000
+++ b/test-data/12.2reads.overlap.expected Sun Sep 08 03:23:56 2024 +0000
@@ -1,6 +1,6 @@
-# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats
# This file contains statistics for all reads.
-# The command line was: stats -t /tmp/tmp0r5zs075/files/2/a/2/dataset_2a2a3ee9-3133-4880-a37c-50b6354c9000.dat -@ 0 infile
+# The command line was: stats -t /tmp/tmp5q_kwqc0/files/4/0/f/dataset_40fb1106-2fe0-491f-8790-2139e9b1b3bd.dat -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)
CHK 5b31676a b0edee94 471895da
@@ -855,6 +855,9 @@
FRL 100 1
# Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count
LRL 100 1
+# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count
+MAPQ 7 1
+MAPQ 37 1
# Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions
ID 2 0 1
ID 3 1 0
diff -r a95f78faca04 -r 8fecc86e574a test-data/12.3reads.nooverlap.expected
--- a/test-data/12.3reads.nooverlap.expected Wed Jun 22 07:47:48 2022 +0000
+++ b/test-data/12.3reads.nooverlap.expected Sun Sep 08 03:23:56 2024 +0000
@@ -1,6 +1,6 @@
-# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats
# This file contains statistics for all reads.
-# The command line was: stats -t /tmp/tmp0r5zs075/files/1/3/0/dataset_13082855-efe1-437e-8a91-ff7d013770db.dat -p -@ 0 infile
+# The command line was: stats -t /tmp/tmp5q_kwqc0/files/b/2/6/dataset_b262e070-b3ea-4ab2-970d-29726b177ea2.dat -p -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)
CHK 09f8b87f 140798ec 2b989f07
@@ -872,6 +872,9 @@
FRL 100 1
# Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count
LRL 100 2
+# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count
+MAPQ 37 1
+MAPQ 60 2
# Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions
ID 1 1 2
# Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev)
diff -r a95f78faca04 -r 8fecc86e574a test-data/12.3reads.overlap.expected
--- a/test-data/12.3reads.overlap.expected Wed Jun 22 07:47:48 2022 +0000
+++ b/test-data/12.3reads.overlap.expected Sun Sep 08 03:23:56 2024 +0000
@@ -1,6 +1,6 @@
-# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats
# This file contains statistics for all reads.
-# The command line was: stats -t /tmp/tmp0r5zs075/files/6/5/e/dataset_65ea4e4d-a70d-4001-911d-9d81ff2829a6.dat -@ 0 infile
+# The command line was: stats -t /tmp/tmp5q_kwqc0/files/8/f/b/dataset_8fbfc56f-17f3-4728-895b-34544e586ee7.dat -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)
CHK 09f8b87f 140798ec 2b989f07
@@ -872,6 +872,9 @@
FRL 100 1
# Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count
LRL 100 2
+# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count
+MAPQ 37 1
+MAPQ 60 2
# Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions
ID 1 1 2
# Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev)
diff -r a95f78faca04 -r 8fecc86e574a test-data/2.stats.expected
--- a/test-data/2.stats.expected Wed Jun 22 07:47:48 2022 +0000
+++ b/test-data/2.stats.expected Sun Sep 08 03:23:56 2024 +0000
@@ -1,4 +1,4 @@
-# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats
# This file contains statistics for all reads.
# The command line was: stats --ref-seq reference.fa -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
@@ -419,6 +419,8 @@
FRL 35 1
# Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count
LRL 35 1
+# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count
+MAPQ 40 2
# Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions
# Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev)
# Coverage distribution. Use `grep ^COV | cut -f 2-` to extract this part.
diff -r a95f78faca04 -r 8fecc86e574a test-data/6.stats.expected
--- a/test-data/6.stats.expected Wed Jun 22 07:47:48 2022 +0000
+++ b/test-data/6.stats.expected Sun Sep 08 03:23:56 2024 +0000
@@ -1,4 +1,4 @@
-# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats
# This file contains statistics for all reads.
# The command line was: stats --insert-size 0 --ref-seq reference.fa -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
@@ -419,6 +419,8 @@
FRL 35 1
# Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count
LRL 35 1
+# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count
+MAPQ 40 2
# Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions
ID 1 1 0
# Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev)
diff -r a95f78faca04 -r 8fecc86e574a test-data/samtools_stats_out1.tab
--- a/test-data/samtools_stats_out1.tab Wed Jun 22 07:47:48 2022 +0000
+++ b/test-data/samtools_stats_out1.tab Sun Sep 08 03:23:56 2024 +0000
@@ -1,4 +1,4 @@
-# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats
# This file contains statistics for all reads.
# The command line was: stats --ref-seq reference.fa -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
@@ -1862,6 +1862,14 @@
FRL 251 100
# Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count
LRL 251 100
+# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count
+MAPQ 0 6
+MAPQ 3 6
+MAPQ 8 3
+MAPQ 23 2
+MAPQ 24 4
+MAPQ 40 2
+MAPQ 42 2
# Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions
ID 1 1 0
ID 2 1 0