# HG changeset patch # User iuc # Date 1725765836 0 # Node ID 8fecc86e574a26ea157d9421651dd8d6cb721f2a # Parent a95f78faca04a5127a6358ea38a2edfb87580a58 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_stats commit 9c5a35ce695c3d134e41d8695487edd5f71ea33c diff -r a95f78faca04 -r 8fecc86e574a macros.xml --- a/macros.xml Wed Jun 22 07:47:48 2022 +0000 +++ b/macros.xml Sun Sep 08 03:23:56 2024 +0000 @@ -5,8 +5,15 @@ - 1.13 - 20.05 + + 1.20 + 2 + 22.05 - + @HELP@ @@ -212,37 +219,7 @@ - - @misc{SAM_def, - title={Definition of SAM/BAM format}, - url = {https://samtools.github.io/hts-specs/},} - - 10.1093/bioinformatics/btp352 - 10.1093/bioinformatics/btr076 - 10.1093/bioinformatics/btr509 - - @misc{Danecek_et_al, - Author={Danecek, P., Schiffels, S., Durbin, R.}, - title={Multiallelic calling model in bcftools (-m)}, - url = {http://samtools.github.io/bcftools/call-m.pdf},} - - - @misc{Durbin_VCQC, - Author={Durbin, R.}, - title={Segregation based metric for variant call QC}, - url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},} - - - @misc{Li_SamMath, - Author={Li, H.}, - title={Mathematical Notes on SAMtools Algorithms}, - url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},} - - - @misc{SamTools_github, - title={SAMTools GitHub page}, - url = {https://github.com/samtools/samtools},} - + 10.1093/gigascience/giab008 diff -r a95f78faca04 -r 8fecc86e574a samtools_stats.xml --- a/samtools_stats.xml Wed Jun 22 07:47:48 2022 +0000 +++ b/samtools_stats.xml Sun Sep 08 03:23:56 2024 +0000 @@ -1,4 +1,4 @@ - + generate statistics for BAM dataset macros.xml @@ -45,7 +45,7 @@ #if $use_ref: --ref-seq "\$reffa" #end if - ## TODO currently not implemented in Galaxy + ## currently not implemented in Galaxy ## generates STR_VALUE.bamstat where STR is given by -P and VALUE is a value of the TAG given by -S ## needs some discover data sets action... ## -P, --split-prefix STR @@ -84,7 +84,7 @@ - + @@ -131,16 +131,16 @@ - + - - - + + + @@ -153,18 +153,17 @@ - - + split_output_cond['split_output_selector'] == 'no' - + split_output_cond['split_output_selector'] == 'yes' - + @@ -173,7 +172,7 @@ - + @@ -185,7 +184,7 @@ - + @@ -199,7 +198,7 @@ - + @@ -212,7 +211,7 @@ - + @@ -227,7 +226,7 @@ - + @@ -240,7 +239,7 @@ - + @@ -255,7 +254,7 @@ - + @@ -267,7 +266,7 @@ - + @@ -280,7 +279,7 @@ - + @@ -292,7 +291,7 @@ - + @@ -305,7 +304,7 @@ - + @@ -313,7 +312,7 @@ - + diff -r a95f78faca04 -r 8fecc86e574a test-data/1.stats.expected --- a/test-data/1.stats.expected Wed Jun 22 07:47:48 2022 +0000 +++ b/test-data/1.stats.expected Sun Sep 08 03:23:56 2024 +0000 @@ -1,4 +1,4 @@ -# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats # This file contains statistics for all reads. # The command line was: stats --ref-seq reference.fa -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities @@ -419,6 +419,8 @@ FRL 35 1 # Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count LRL 35 1 +# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count +MAPQ 40 2 # Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions # Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev) # Coverage distribution. Use `grep ^COV | cut -f 2-` to extract this part. diff -r a95f78faca04 -r 8fecc86e574a test-data/11.stats.expected --- a/test-data/11.stats.expected Wed Jun 22 07:47:48 2022 +0000 +++ b/test-data/11.stats.expected Sun Sep 08 03:23:56 2024 +0000 @@ -1,4 +1,4 @@ -# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats # This file contains statistics for all reads. # The command line was: stats -@ 0 infile ref1:10-24 ref1:30-46 ref1:39-56 # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities @@ -183,6 +183,32 @@ FRL 10 14 # Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count LRL 10 12 +# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count +MAPQ 0 1 +MAPQ 1 1 +MAPQ 2 1 +MAPQ 3 1 +MAPQ 4 1 +MAPQ 5 1 +MAPQ 6 1 +MAPQ 7 1 +MAPQ 8 1 +MAPQ 9 1 +MAPQ 10 1 +MAPQ 11 1 +MAPQ 12 1 +MAPQ 13 1 +MAPQ 14 1 +MAPQ 15 1 +MAPQ 16 1 +MAPQ 17 1 +MAPQ 18 1 +MAPQ 19 1 +MAPQ 20 1 +MAPQ 21 1 +MAPQ 22 1 +MAPQ 23 1 +MAPQ 50 2 # Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions # Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev) # Coverage distribution. Use `grep ^COV | cut -f 2-` to extract this part. diff -r a95f78faca04 -r 8fecc86e574a test-data/11.stats.g4.expected --- a/test-data/11.stats.g4.expected Wed Jun 22 07:47:48 2022 +0000 +++ b/test-data/11.stats.g4.expected Sun Sep 08 03:23:56 2024 +0000 @@ -1,4 +1,4 @@ -# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats # This file contains statistics for all reads. # The command line was: stats -g 4 -@ 0 infile ref1:10-24 ref1:30-46 ref1:39-56 # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities @@ -183,6 +183,32 @@ FRL 10 14 # Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count LRL 10 12 +# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count +MAPQ 0 1 +MAPQ 1 1 +MAPQ 2 1 +MAPQ 3 1 +MAPQ 4 1 +MAPQ 5 1 +MAPQ 6 1 +MAPQ 7 1 +MAPQ 8 1 +MAPQ 9 1 +MAPQ 10 1 +MAPQ 11 1 +MAPQ 12 1 +MAPQ 13 1 +MAPQ 14 1 +MAPQ 15 1 +MAPQ 16 1 +MAPQ 17 1 +MAPQ 18 1 +MAPQ 19 1 +MAPQ 20 1 +MAPQ 21 1 +MAPQ 22 1 +MAPQ 23 1 +MAPQ 50 2 # Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions # Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev) # Coverage distribution. Use `grep ^COV | cut -f 2-` to extract this part. diff -r a95f78faca04 -r 8fecc86e574a test-data/12.2reads.nooverlap.expected --- a/test-data/12.2reads.nooverlap.expected Wed Jun 22 07:47:48 2022 +0000 +++ b/test-data/12.2reads.nooverlap.expected Sun Sep 08 03:23:56 2024 +0000 @@ -1,6 +1,6 @@ -# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats # This file contains statistics for all reads. -# The command line was: stats -t /tmp/tmp0r5zs075/files/b/2/1/dataset_b2175431-044e-449d-8f60-1bfd33679b61.dat -p -@ 0 infile +# The command line was: stats -t /tmp/tmp5q_kwqc0/files/5/e/6/dataset_5e6cda5d-79e8-4b8f-9e14-97f3be9f87de.dat -p -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities # CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow) CHK 5b31676a b0edee94 471895da @@ -855,6 +855,9 @@ FRL 100 1 # Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count LRL 100 1 +# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count +MAPQ 7 1 +MAPQ 37 1 # Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions ID 2 0 1 ID 3 1 0 diff -r a95f78faca04 -r 8fecc86e574a test-data/12.2reads.overlap.expected --- a/test-data/12.2reads.overlap.expected Wed Jun 22 07:47:48 2022 +0000 +++ b/test-data/12.2reads.overlap.expected Sun Sep 08 03:23:56 2024 +0000 @@ -1,6 +1,6 @@ -# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats # This file contains statistics for all reads. -# The command line was: stats -t /tmp/tmp0r5zs075/files/2/a/2/dataset_2a2a3ee9-3133-4880-a37c-50b6354c9000.dat -@ 0 infile +# The command line was: stats -t /tmp/tmp5q_kwqc0/files/4/0/f/dataset_40fb1106-2fe0-491f-8790-2139e9b1b3bd.dat -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities # CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow) CHK 5b31676a b0edee94 471895da @@ -855,6 +855,9 @@ FRL 100 1 # Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count LRL 100 1 +# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count +MAPQ 7 1 +MAPQ 37 1 # Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions ID 2 0 1 ID 3 1 0 diff -r a95f78faca04 -r 8fecc86e574a test-data/12.3reads.nooverlap.expected --- a/test-data/12.3reads.nooverlap.expected Wed Jun 22 07:47:48 2022 +0000 +++ b/test-data/12.3reads.nooverlap.expected Sun Sep 08 03:23:56 2024 +0000 @@ -1,6 +1,6 @@ -# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats # This file contains statistics for all reads. -# The command line was: stats -t /tmp/tmp0r5zs075/files/1/3/0/dataset_13082855-efe1-437e-8a91-ff7d013770db.dat -p -@ 0 infile +# The command line was: stats -t /tmp/tmp5q_kwqc0/files/b/2/6/dataset_b262e070-b3ea-4ab2-970d-29726b177ea2.dat -p -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities # CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow) CHK 09f8b87f 140798ec 2b989f07 @@ -872,6 +872,9 @@ FRL 100 1 # Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count LRL 100 2 +# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count +MAPQ 37 1 +MAPQ 60 2 # Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions ID 1 1 2 # Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev) diff -r a95f78faca04 -r 8fecc86e574a test-data/12.3reads.overlap.expected --- a/test-data/12.3reads.overlap.expected Wed Jun 22 07:47:48 2022 +0000 +++ b/test-data/12.3reads.overlap.expected Sun Sep 08 03:23:56 2024 +0000 @@ -1,6 +1,6 @@ -# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats # This file contains statistics for all reads. -# The command line was: stats -t /tmp/tmp0r5zs075/files/6/5/e/dataset_65ea4e4d-a70d-4001-911d-9d81ff2829a6.dat -@ 0 infile +# The command line was: stats -t /tmp/tmp5q_kwqc0/files/8/f/b/dataset_8fbfc56f-17f3-4728-895b-34544e586ee7.dat -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities # CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow) CHK 09f8b87f 140798ec 2b989f07 @@ -872,6 +872,9 @@ FRL 100 1 # Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count LRL 100 2 +# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count +MAPQ 37 1 +MAPQ 60 2 # Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions ID 1 1 2 # Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev) diff -r a95f78faca04 -r 8fecc86e574a test-data/2.stats.expected --- a/test-data/2.stats.expected Wed Jun 22 07:47:48 2022 +0000 +++ b/test-data/2.stats.expected Sun Sep 08 03:23:56 2024 +0000 @@ -1,4 +1,4 @@ -# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats # This file contains statistics for all reads. # The command line was: stats --ref-seq reference.fa -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities @@ -419,6 +419,8 @@ FRL 35 1 # Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count LRL 35 1 +# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count +MAPQ 40 2 # Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions # Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev) # Coverage distribution. Use `grep ^COV | cut -f 2-` to extract this part. diff -r a95f78faca04 -r 8fecc86e574a test-data/6.stats.expected --- a/test-data/6.stats.expected Wed Jun 22 07:47:48 2022 +0000 +++ b/test-data/6.stats.expected Sun Sep 08 03:23:56 2024 +0000 @@ -1,4 +1,4 @@ -# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats # This file contains statistics for all reads. # The command line was: stats --insert-size 0 --ref-seq reference.fa -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities @@ -419,6 +419,8 @@ FRL 35 1 # Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count LRL 35 1 +# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count +MAPQ 40 2 # Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions ID 1 1 0 # Indels per cycle. Use `grep ^IC | cut -f 2-` to extract this part. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev) diff -r a95f78faca04 -r 8fecc86e574a test-data/samtools_stats_out1.tab --- a/test-data/samtools_stats_out1.tab Wed Jun 22 07:47:48 2022 +0000 +++ b/test-data/samtools_stats_out1.tab Sun Sep 08 03:23:56 2024 +0000 @@ -1,4 +1,4 @@ -# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file was produced by samtools stats (1.20+htslib-1.20) and can be plotted using plot-bamstats # This file contains statistics for all reads. # The command line was: stats --ref-seq reference.fa -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities @@ -1862,6 +1862,14 @@ FRL 251 100 # Read lengths - last fragments. Use `grep ^LRL | cut -f 2-` to extract this part. The columns are: read length, count LRL 251 100 +# Mapping qualities for reads !(UNMAP|SECOND|SUPPL|QCFAIL|DUP). Use `grep ^MAPQ | cut -f 2-` to extract this part. The columns are: mapq, count +MAPQ 0 6 +MAPQ 3 6 +MAPQ 8 3 +MAPQ 23 2 +MAPQ 24 4 +MAPQ 40 2 +MAPQ 42 2 # Indel distribution. Use `grep ^ID | cut -f 2-` to extract this part. The columns are: length, number of insertions, number of deletions ID 1 1 0 ID 2 1 0