# HG changeset patch
# User iuc
# Date 1632845302 0
# Node ID ab75c7ea49a5d9bba174414a7df8f7e01ae3fecb
# Parent df598009c8210cfe75e461aae93b55dbd8fbb6ce
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_stats commit 0f75269223c0821c6c82acf98fde947d0f816f2b"
diff -r df598009c821 -r ab75c7ea49a5 macros.xml
--- a/macros.xml Thu Oct 10 03:39:57 2019 -0400
+++ b/macros.xml Tue Sep 28 16:08:22 2021 +0000
@@ -5,10 +5,16 @@
- 1.9
- #set $flags = sum(map(int, str($filter).split(',')))
+ 1.13
+ 20.05
+
+
+
+
+
+ @HELP@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @HELP@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
generate statistics for BAM dataset
macros.xml
@@ -16,16 +16,12 @@
#end if
${remove_dups}
#if str( $filter_by_flags.filter_flags ) == "filter":
- #if $filter_by_flags.require_flags:
- #set $filter = $filter_by_flags.require_flags
- @FLAGS@
- --required-flag $flags
- #end if
- #if $filter_by_flags.exclude_flags:
- #set $filter = $filter_by_flags.exclude_flags
- @FLAGS@
- --filtering-flag $flags
- #end if
+ #set $filter = $filter_by_flags.require_flags
+ @FLAGS@
+ --required-flag $flags
+ #set $filter = $filter_by_flags.exclude_flags
+ @FLAGS@
+ --filtering-flag $flags
#end if
#if str($gc_depth):
--GC-depth ${gc_depth}
@@ -33,12 +29,9 @@
#if str($insert_size):
--insert-size ${insert_size}
#end if
- ## The code below is commented out because using -I/--id options causes
- ## in samtools up to 1.9 the following exception
- ## Samtools-htslib: init_group_id() header parsing not yet implemented
- ##if str($read_group) != "":
- ## -I "${read_group}"
- ##end if
+ ## #if $read_group
+ ## -I '$read_group'
+ ## #end if
#if str($read_length):
--read-length ${read_length}
#end if
@@ -63,7 +56,7 @@
#if str($cov_threshold):
-g $cov_threshold
#end if
- -@ \$addthreads
+ -@ \$addthreads
infile
@REGIONS_MANUAL@
> '$output'
@@ -139,40 +132,19 @@
-
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
+
@@ -198,7 +170,7 @@
-
+
@@ -207,7 +179,7 @@
-
+
@@ -220,7 +192,7 @@
-
+
@@ -236,7 +208,7 @@
-
+
@@ -251,7 +223,7 @@
-
+
@@ -265,7 +237,7 @@
-
+
@@ -280,7 +252,7 @@
-
+
@@ -292,7 +264,7 @@
-
+
@@ -305,7 +277,7 @@
-
+
@@ -317,7 +289,7 @@
-
+
@@ -330,7 +302,7 @@
-
+
@@ -357,6 +329,18 @@
+
+
-
diff -r df598009c821 -r ab75c7ea49a5 test-data/1.stats.expected
--- a/test-data/1.stats.expected Thu Oct 10 03:39:57 2019 -0400
+++ b/test-data/1.stats.expected Tue Sep 28 16:08:22 2021 +0000
@@ -1,8 +1,11 @@
+# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file contains statistics for all reads.
+# The command line was: stats --ref-seq reference.fa -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)
CHK 1a1c1362 29c426ae 7bab45da
# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.
-SN raw total sequences: 2
+SN raw total sequences: 2 # excluding supplementary and secondary reads
SN filtered sequences: 0
SN sequences: 2
SN is sorted: 1
@@ -17,6 +20,7 @@
SN reads MQ0: 0 # mapped and MQ=0
SN reads QC failed: 0
SN non-primary alignments: 0
+SN supplementary alignments: 0
SN total length: 70 # ignores clipping
SN total first fragment length: 35 # ignores clipping
SN total last fragment length: 35 # ignores clipping
@@ -195,6 +199,42 @@
GCC 33 50.00 0.00 0.00 50.00 0.00 0.00
GCC 34 50.00 0.00 50.00 0.00 0.00 0.00
GCC 35 0.00 0.00 50.00 50.00 0.00 0.00
+# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]
+GCT 1 0.00 50.00 0.00 50.00
+GCT 2 50.00 0.00 50.00 0.00
+GCT 3 0.00 0.00 50.00 50.00
+GCT 4 0.00 0.00 100.00 0.00
+GCT 5 0.00 0.00 50.00 50.00
+GCT 6 0.00 50.00 0.00 50.00
+GCT 7 0.00 0.00 50.00 50.00
+GCT 8 0.00 0.00 0.00 100.00
+GCT 9 0.00 50.00 50.00 0.00
+GCT 10 50.00 0.00 50.00 0.00
+GCT 11 50.00 0.00 0.00 50.00
+GCT 12 50.00 0.00 50.00 0.00
+GCT 13 50.00 0.00 50.00 0.00
+GCT 14 0.00 0.00 0.00 100.00
+GCT 15 100.00 0.00 0.00 0.00
+GCT 16 50.00 0.00 0.00 50.00
+GCT 17 0.00 0.00 50.00 50.00
+GCT 18 0.00 50.00 50.00 0.00
+GCT 19 0.00 100.00 0.00 0.00
+GCT 20 0.00 0.00 50.00 50.00
+GCT 21 0.00 0.00 100.00 0.00
+GCT 22 0.00 50.00 0.00 50.00
+GCT 23 50.00 0.00 0.00 50.00
+GCT 24 50.00 0.00 50.00 0.00
+GCT 25 50.00 0.00 50.00 0.00
+GCT 26 0.00 0.00 100.00 0.00
+GCT 27 50.00 0.00 0.00 50.00
+GCT 28 0.00 0.00 50.00 50.00
+GCT 29 0.00 50.00 0.00 50.00
+GCT 30 0.00 50.00 0.00 50.00
+GCT 31 0.00 50.00 50.00 0.00
+GCT 32 0.00 0.00 100.00 0.00
+GCT 33 100.00 0.00 0.00 0.00
+GCT 34 0.00 0.00 50.00 50.00
+GCT 35 50.00 0.00 50.00 0.00
# ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
FBC 1 0.00 0.00 0.00 100.00 0.00 0.00
FBC 2 0.00 0.00 100.00 0.00 0.00 0.00
@@ -231,6 +271,8 @@
FBC 33 100.00 0.00 0.00 0.00 0.00 0.00
FBC 34 0.00 0.00 100.00 0.00 0.00 0.00
FBC 35 0.00 0.00 100.00 0.00 0.00 0.00
+# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+FTC 6 5 15 9 0
# ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
LBC 1 0.00 0.00 100.00 0.00 0.00 0.00
LBC 2 0.00 0.00 0.00 100.00 0.00 0.00
@@ -267,6 +309,8 @@
LBC 33 0.00 0.00 0.00 100.00 0.00 0.00
LBC 34 100.00 0.00 0.00 0.00 0.00 0.00
LBC 35 0.00 0.00 0.00 100.00 0.00 0.00
+# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+LTC 11 10 5 9 0
# Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs
IS 0 0 0 0 0
IS 1 0 0 0 0
diff -r df598009c821 -r ab75c7ea49a5 test-data/11.stats.expected
--- a/test-data/11.stats.expected Thu Oct 10 03:39:57 2019 -0400
+++ b/test-data/11.stats.expected Tue Sep 28 16:08:22 2021 +0000
@@ -1,8 +1,11 @@
+# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file contains statistics for all reads.
+# The command line was: stats -@ 0 infile ref1:10-24 ref1:30-46 ref1:39-56
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)
CHK cb2d2d82 bcd83869 62ec814e
# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.
-SN raw total sequences: 26
+SN raw total sequences: 26 # excluding supplementary and secondary reads
SN filtered sequences: 0
SN sequences: 26
SN is sorted: 1
@@ -17,6 +20,7 @@
SN reads MQ0: 1 # mapped and MQ=0
SN reads QC failed: 0
SN non-primary alignments: 0
+SN supplementary alignments: 0
SN total length: 260 # ignores clipping
SN total first fragment length: 140 # ignores clipping
SN total last fragment length: 120 # ignores clipping
@@ -89,6 +93,17 @@
GCC 8 26.92 23.08 38.46 11.54 0.00 0.00
GCC 9 23.08 26.92 26.92 23.08 0.00 0.00
GCC 10 23.08 23.08 38.46 15.38 0.00 0.00
+# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]
+GCT 1 26.92 26.92 30.77 15.38
+GCT 2 7.69 38.46 26.92 26.92
+GCT 3 26.92 26.92 30.77 15.38
+GCT 4 11.54 34.62 26.92 26.92
+GCT 5 23.08 26.92 38.46 11.54
+GCT 6 11.54 34.62 23.08 30.77
+GCT 7 19.23 23.08 38.46 19.23
+GCT 8 11.54 38.46 23.08 26.92
+GCT 9 23.08 19.23 34.62 23.08
+GCT 10 11.54 34.62 26.92 26.92
# ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
FBC 1 21.43 42.86 21.43 14.29 0.00 0.00
FBC 2 7.14 28.57 42.86 21.43 0.00 0.00
@@ -100,6 +115,8 @@
FBC 8 21.43 28.57 28.57 21.43 0.00 0.00
FBC 9 21.43 21.43 35.71 21.43 0.00 0.00
FBC 10 14.29 28.57 35.71 21.43 0.00 0.00
+# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+FTC 24 43 45 28 0
# ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
LBC 1 16.67 41.67 8.33 33.33 0.00 0.00
LBC 2 33.33 8.33 50.00 8.33 0.00 0.00
@@ -111,6 +128,19 @@
LBC 8 33.33 16.67 50.00 0.00 0.00 0.00
LBC 9 25.00 33.33 16.67 25.00 0.00 0.00
LBC 10 33.33 16.67 41.67 8.33 0.00 0.00
+# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+LTC 30 33 36 21 0
+# ACGT content per cycle for barcodes. Use `grep ^BCC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N counts as a percentage of all A/C/G/T bases [%]
+BCC1 1 50.00 0.00 0.00 50.00 0.00
+BCC1 2 0.00 50.00 50.00 0.00 0.00
+BCC1 3 0.00 50.00 50.00 0.00 0.00
+BCC1 4 50.00 0.00 0.00 50.00 0.00
+# Barcode Qualities. Use `grep ^QTQ | cut -f 2-` to extract this part.
+# Columns correspond to qualities and rows to barcode cycles. First column is the cycle number.
+QTQ1 1
+QTQ1 2
+QTQ1 3
+QTQ1 4
# Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs
IS 0 0 0 0 0
IS 1 0 0 0 0
diff -r df598009c821 -r ab75c7ea49a5 test-data/11.stats.g4.expected
--- a/test-data/11.stats.g4.expected Thu Oct 10 03:39:57 2019 -0400
+++ b/test-data/11.stats.g4.expected Tue Sep 28 16:08:22 2021 +0000
@@ -1,8 +1,11 @@
+# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file contains statistics for all reads.
+# The command line was: stats -g 4 -@ 0 infile ref1:10-24 ref1:30-46 ref1:39-56
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)
CHK cb2d2d82 bcd83869 62ec814e
# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.
-SN raw total sequences: 26
+SN raw total sequences: 26 # excluding supplementary and secondary reads
SN filtered sequences: 0
SN sequences: 26
SN is sorted: 1
@@ -17,6 +20,7 @@
SN reads MQ0: 1 # mapped and MQ=0
SN reads QC failed: 0
SN non-primary alignments: 0
+SN supplementary alignments: 0
SN total length: 260 # ignores clipping
SN total first fragment length: 140 # ignores clipping
SN total last fragment length: 120 # ignores clipping
@@ -89,6 +93,17 @@
GCC 8 26.92 23.08 38.46 11.54 0.00 0.00
GCC 9 23.08 26.92 26.92 23.08 0.00 0.00
GCC 10 23.08 23.08 38.46 15.38 0.00 0.00
+# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]
+GCT 1 26.92 26.92 30.77 15.38
+GCT 2 7.69 38.46 26.92 26.92
+GCT 3 26.92 26.92 30.77 15.38
+GCT 4 11.54 34.62 26.92 26.92
+GCT 5 23.08 26.92 38.46 11.54
+GCT 6 11.54 34.62 23.08 30.77
+GCT 7 19.23 23.08 38.46 19.23
+GCT 8 11.54 38.46 23.08 26.92
+GCT 9 23.08 19.23 34.62 23.08
+GCT 10 11.54 34.62 26.92 26.92
# ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
FBC 1 21.43 42.86 21.43 14.29 0.00 0.00
FBC 2 7.14 28.57 42.86 21.43 0.00 0.00
@@ -100,6 +115,8 @@
FBC 8 21.43 28.57 28.57 21.43 0.00 0.00
FBC 9 21.43 21.43 35.71 21.43 0.00 0.00
FBC 10 14.29 28.57 35.71 21.43 0.00 0.00
+# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+FTC 24 43 45 28 0
# ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
LBC 1 16.67 41.67 8.33 33.33 0.00 0.00
LBC 2 33.33 8.33 50.00 8.33 0.00 0.00
@@ -111,6 +128,19 @@
LBC 8 33.33 16.67 50.00 0.00 0.00 0.00
LBC 9 25.00 33.33 16.67 25.00 0.00 0.00
LBC 10 33.33 16.67 41.67 8.33 0.00 0.00
+# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+LTC 30 33 36 21 0
+# ACGT content per cycle for barcodes. Use `grep ^BCC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N counts as a percentage of all A/C/G/T bases [%]
+BCC1 1 50.00 0.00 0.00 50.00 0.00
+BCC1 2 0.00 50.00 50.00 0.00 0.00
+BCC1 3 0.00 50.00 50.00 0.00 0.00
+BCC1 4 50.00 0.00 0.00 50.00 0.00
+# Barcode Qualities. Use `grep ^QTQ | cut -f 2-` to extract this part.
+# Columns correspond to qualities and rows to barcode cycles. First column is the cycle number.
+QTQ1 1
+QTQ1 2
+QTQ1 3
+QTQ1 4
# Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs
IS 0 0 0 0 0
IS 1 0 0 0 0
diff -r df598009c821 -r ab75c7ea49a5 test-data/12.2reads.nooverlap.expected
--- a/test-data/12.2reads.nooverlap.expected Thu Oct 10 03:39:57 2019 -0400
+++ b/test-data/12.2reads.nooverlap.expected Tue Sep 28 16:08:22 2021 +0000
@@ -1,8 +1,11 @@
+# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file contains statistics for all reads.
+# The command line was: stats -t /tmp/tmp0r5zs075/files/b/2/1/dataset_b2175431-044e-449d-8f60-1bfd33679b61.dat -p -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)
CHK 5b31676a b0edee94 471895da
# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.
-SN raw total sequences: 2
+SN raw total sequences: 2 # excluding supplementary and secondary reads
SN filtered sequences: 0
SN sequences: 2
SN is sorted: 1
@@ -17,6 +20,7 @@
SN reads MQ0: 0 # mapped and MQ=0
SN reads QC failed: 0
SN non-primary alignments: 0
+SN supplementary alignments: 0
SN total length: 200 # ignores clipping
SN total first fragment length: 100 # ignores clipping
SN total last fragment length: 100 # ignores clipping
@@ -353,6 +357,107 @@
GCC 98 50.00 50.00 0.00 0.00 0.00 0.00
GCC 99 50.00 50.00 0.00 0.00 0.00 0.00
GCC 100 100.00 0.00 0.00 0.00 0.00 0.00
+# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]
+GCT 1 50.00 0.00 50.00 0.00
+GCT 2 0.00 50.00 50.00 0.00
+GCT 3 0.00 50.00 0.00 50.00
+GCT 4 0.00 50.00 50.00 0.00
+GCT 5 0.00 0.00 50.00 50.00
+GCT 6 0.00 0.00 50.00 50.00
+GCT 7 0.00 50.00 0.00 50.00
+GCT 8 0.00 0.00 50.00 50.00
+GCT 9 0.00 50.00 0.00 50.00
+GCT 10 50.00 0.00 50.00 0.00
+GCT 11 0.00 0.00 100.00 0.00
+GCT 12 50.00 0.00 0.00 50.00
+GCT 13 0.00 50.00 50.00 0.00
+GCT 14 50.00 0.00 0.00 50.00
+GCT 15 0.00 0.00 50.00 50.00
+GCT 16 0.00 50.00 50.00 0.00
+GCT 17 0.00 50.00 0.00 50.00
+GCT 18 100.00 0.00 0.00 0.00
+GCT 19 50.00 0.00 0.00 50.00
+GCT 20 0.00 50.00 50.00 0.00
+GCT 21 50.00 0.00 0.00 50.00
+GCT 22 0.00 50.00 0.00 50.00
+GCT 23 0.00 0.00 0.00 100.00
+GCT 24 0.00 50.00 50.00 0.00
+GCT 25 0.00 0.00 0.00 100.00
+GCT 26 0.00 0.00 50.00 50.00
+GCT 27 0.00 100.00 0.00 0.00
+GCT 28 0.00 0.00 0.00 100.00
+GCT 29 50.00 0.00 50.00 0.00
+GCT 30 0.00 50.00 0.00 50.00
+GCT 31 0.00 50.00 0.00 50.00
+GCT 32 0.00 0.00 50.00 50.00
+GCT 33 0.00 50.00 0.00 50.00
+GCT 34 50.00 0.00 0.00 50.00
+GCT 35 0.00 50.00 0.00 50.00
+GCT 36 0.00 100.00 0.00 0.00
+GCT 37 0.00 50.00 0.00 50.00
+GCT 38 50.00 50.00 0.00 0.00
+GCT 39 100.00 0.00 0.00 0.00
+GCT 40 0.00 0.00 100.00 0.00
+GCT 41 0.00 50.00 0.00 50.00
+GCT 42 50.00 50.00 0.00 0.00
+GCT 43 0.00 0.00 50.00 50.00
+GCT 44 50.00 0.00 50.00 0.00
+GCT 45 50.00 0.00 50.00 0.00
+GCT 46 50.00 0.00 50.00 0.00
+GCT 47 50.00 0.00 50.00 0.00
+GCT 48 0.00 50.00 50.00 0.00
+GCT 49 50.00 0.00 0.00 50.00
+GCT 50 0.00 0.00 50.00 50.00
+GCT 51 0.00 0.00 100.00 0.00
+GCT 52 50.00 0.00 50.00 0.00
+GCT 53 0.00 50.00 0.00 50.00
+GCT 54 50.00 0.00 50.00 0.00
+GCT 55 50.00 0.00 0.00 50.00
+GCT 56 0.00 50.00 50.00 0.00
+GCT 57 100.00 0.00 0.00 0.00
+GCT 58 50.00 0.00 50.00 0.00
+GCT 59 100.00 0.00 0.00 0.00
+GCT 60 50.00 0.00 50.00 0.00
+GCT 61 0.00 0.00 0.00 100.00
+GCT 62 0.00 50.00 50.00 0.00
+GCT 63 50.00 0.00 0.00 50.00
+GCT 64 0.00 0.00 100.00 0.00
+GCT 65 50.00 50.00 0.00 0.00
+GCT 66 0.00 50.00 0.00 50.00
+GCT 67 0.00 0.00 0.00 100.00
+GCT 68 0.00 50.00 0.00 50.00
+GCT 69 0.00 50.00 0.00 50.00
+GCT 70 50.00 0.00 0.00 50.00
+GCT 71 0.00 0.00 0.00 100.00
+GCT 72 0.00 50.00 0.00 50.00
+GCT 73 0.00 0.00 0.00 100.00
+GCT 74 0.00 50.00 0.00 50.00
+GCT 75 50.00 0.00 0.00 50.00
+GCT 76 0.00 50.00 0.00 50.00
+GCT 77 50.00 0.00 0.00 50.00
+GCT 78 50.00 0.00 0.00 50.00
+GCT 79 50.00 0.00 0.00 50.00
+GCT 80 50.00 0.00 0.00 50.00
+GCT 81 50.00 0.00 0.00 50.00
+GCT 82 50.00 0.00 0.00 50.00
+GCT 83 50.00 0.00 0.00 50.00
+GCT 84 50.00 0.00 0.00 50.00
+GCT 85 50.00 0.00 0.00 50.00
+GCT 86 50.00 0.00 0.00 50.00
+GCT 87 50.00 0.00 0.00 50.00
+GCT 88 50.00 0.00 0.00 50.00
+GCT 89 50.00 0.00 0.00 50.00
+GCT 90 50.00 0.00 0.00 50.00
+GCT 91 50.00 0.00 50.00 0.00
+GCT 92 50.00 0.00 50.00 0.00
+GCT 93 50.00 0.00 50.00 0.00
+GCT 94 50.00 0.00 0.00 50.00
+GCT 95 50.00 0.00 50.00 0.00
+GCT 96 50.00 0.00 50.00 0.00
+GCT 97 50.00 0.00 50.00 0.00
+GCT 98 50.00 0.00 50.00 0.00
+GCT 99 50.00 0.00 50.00 0.00
+GCT 100 50.00 0.00 0.00 50.00
# ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
FBC 1 0.00 0.00 0.00 100.00 0.00 0.00
FBC 2 0.00 0.00 100.00 0.00 0.00 0.00
@@ -454,6 +559,8 @@
FBC 98 0.00 100.00 0.00 0.00 0.00 0.00
FBC 99 0.00 100.00 0.00 0.00 0.00 0.00
FBC 100 100.00 0.00 0.00 0.00 0.00 0.00
+# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+FTC 50 21 14 15 0
# ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
LBC 1 0.00 0.00 100.00 0.00 0.00 0.00
LBC 2 0.00 0.00 100.00 0.00 0.00 0.00
@@ -555,6 +662,26 @@
LBC 98 100.00 0.00 0.00 0.00 0.00 0.00
LBC 99 100.00 0.00 0.00 0.00 0.00 0.00
LBC 100 100.00 0.00 0.00 0.00 0.00 0.00
+# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+LTC 41 20 23 16 0
+# ACGT content per cycle for barcodes. Use `grep ^BCC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N counts as a percentage of all A/C/G/T bases [%]
+BCC1 2 0.00 0.00 100.00 0.00 0.00
+BCC1 3 0.00 0.00 0.00 100.00 0.00
+BCC1 4 0.00 100.00 0.00 0.00 0.00
+BCC1 5 0.00 0.00 0.00 100.00 0.00
+BCC1 6 100.00 0.00 0.00 0.00 0.00
+BCC1 7 0.00 0.00 0.00 100.00 0.00
+BCC1 8 0.00 100.00 0.00 0.00 0.00
+# Barcode Qualities. Use `grep ^QTQ | cut -f 2-` to extract this part.
+# Columns correspond to qualities and rows to barcode cycles. First column is the cycle number.
+QTQ1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+QTQ1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+QTQ1 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
+QTQ1 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
+QTQ1 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
+QTQ1 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
+QTQ1 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
+QTQ1 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
# Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs
IS 0 0 0 0 0
IS 1 0 0 0 0
diff -r df598009c821 -r ab75c7ea49a5 test-data/12.2reads.overlap.expected
--- a/test-data/12.2reads.overlap.expected Thu Oct 10 03:39:57 2019 -0400
+++ b/test-data/12.2reads.overlap.expected Tue Sep 28 16:08:22 2021 +0000
@@ -1,8 +1,11 @@
+# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file contains statistics for all reads.
+# The command line was: stats -t /tmp/tmp0r5zs075/files/2/a/2/dataset_2a2a3ee9-3133-4880-a37c-50b6354c9000.dat -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)
CHK 5b31676a b0edee94 471895da
# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.
-SN raw total sequences: 2
+SN raw total sequences: 2 # excluding supplementary and secondary reads
SN filtered sequences: 0
SN sequences: 2
SN is sorted: 1
@@ -17,6 +20,7 @@
SN reads MQ0: 0 # mapped and MQ=0
SN reads QC failed: 0
SN non-primary alignments: 0
+SN supplementary alignments: 0
SN total length: 200 # ignores clipping
SN total first fragment length: 100 # ignores clipping
SN total last fragment length: 100 # ignores clipping
@@ -353,6 +357,107 @@
GCC 98 50.00 50.00 0.00 0.00 0.00 0.00
GCC 99 50.00 50.00 0.00 0.00 0.00 0.00
GCC 100 100.00 0.00 0.00 0.00 0.00 0.00
+# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]
+GCT 1 50.00 0.00 50.00 0.00
+GCT 2 0.00 50.00 50.00 0.00
+GCT 3 0.00 50.00 0.00 50.00
+GCT 4 0.00 50.00 50.00 0.00
+GCT 5 0.00 0.00 50.00 50.00
+GCT 6 0.00 0.00 50.00 50.00
+GCT 7 0.00 50.00 0.00 50.00
+GCT 8 0.00 0.00 50.00 50.00
+GCT 9 0.00 50.00 0.00 50.00
+GCT 10 50.00 0.00 50.00 0.00
+GCT 11 0.00 0.00 100.00 0.00
+GCT 12 50.00 0.00 0.00 50.00
+GCT 13 0.00 50.00 50.00 0.00
+GCT 14 50.00 0.00 0.00 50.00
+GCT 15 0.00 0.00 50.00 50.00
+GCT 16 0.00 50.00 50.00 0.00
+GCT 17 0.00 50.00 0.00 50.00
+GCT 18 100.00 0.00 0.00 0.00
+GCT 19 50.00 0.00 0.00 50.00
+GCT 20 0.00 50.00 50.00 0.00
+GCT 21 50.00 0.00 0.00 50.00
+GCT 22 0.00 50.00 0.00 50.00
+GCT 23 0.00 0.00 0.00 100.00
+GCT 24 0.00 50.00 50.00 0.00
+GCT 25 0.00 0.00 0.00 100.00
+GCT 26 0.00 0.00 50.00 50.00
+GCT 27 0.00 100.00 0.00 0.00
+GCT 28 0.00 0.00 0.00 100.00
+GCT 29 50.00 0.00 50.00 0.00
+GCT 30 0.00 50.00 0.00 50.00
+GCT 31 0.00 50.00 0.00 50.00
+GCT 32 0.00 0.00 50.00 50.00
+GCT 33 0.00 50.00 0.00 50.00
+GCT 34 50.00 0.00 0.00 50.00
+GCT 35 0.00 50.00 0.00 50.00
+GCT 36 0.00 100.00 0.00 0.00
+GCT 37 0.00 50.00 0.00 50.00
+GCT 38 50.00 50.00 0.00 0.00
+GCT 39 100.00 0.00 0.00 0.00
+GCT 40 0.00 0.00 100.00 0.00
+GCT 41 0.00 50.00 0.00 50.00
+GCT 42 50.00 50.00 0.00 0.00
+GCT 43 0.00 0.00 50.00 50.00
+GCT 44 50.00 0.00 50.00 0.00
+GCT 45 50.00 0.00 50.00 0.00
+GCT 46 50.00 0.00 50.00 0.00
+GCT 47 50.00 0.00 50.00 0.00
+GCT 48 0.00 50.00 50.00 0.00
+GCT 49 50.00 0.00 0.00 50.00
+GCT 50 0.00 0.00 50.00 50.00
+GCT 51 0.00 0.00 100.00 0.00
+GCT 52 50.00 0.00 50.00 0.00
+GCT 53 0.00 50.00 0.00 50.00
+GCT 54 50.00 0.00 50.00 0.00
+GCT 55 50.00 0.00 0.00 50.00
+GCT 56 0.00 50.00 50.00 0.00
+GCT 57 100.00 0.00 0.00 0.00
+GCT 58 50.00 0.00 50.00 0.00
+GCT 59 100.00 0.00 0.00 0.00
+GCT 60 50.00 0.00 50.00 0.00
+GCT 61 0.00 0.00 0.00 100.00
+GCT 62 0.00 50.00 50.00 0.00
+GCT 63 50.00 0.00 0.00 50.00
+GCT 64 0.00 0.00 100.00 0.00
+GCT 65 50.00 50.00 0.00 0.00
+GCT 66 0.00 50.00 0.00 50.00
+GCT 67 0.00 0.00 0.00 100.00
+GCT 68 0.00 50.00 0.00 50.00
+GCT 69 0.00 50.00 0.00 50.00
+GCT 70 50.00 0.00 0.00 50.00
+GCT 71 0.00 0.00 0.00 100.00
+GCT 72 0.00 50.00 0.00 50.00
+GCT 73 0.00 0.00 0.00 100.00
+GCT 74 0.00 50.00 0.00 50.00
+GCT 75 50.00 0.00 0.00 50.00
+GCT 76 0.00 50.00 0.00 50.00
+GCT 77 50.00 0.00 0.00 50.00
+GCT 78 50.00 0.00 0.00 50.00
+GCT 79 50.00 0.00 0.00 50.00
+GCT 80 50.00 0.00 0.00 50.00
+GCT 81 50.00 0.00 0.00 50.00
+GCT 82 50.00 0.00 0.00 50.00
+GCT 83 50.00 0.00 0.00 50.00
+GCT 84 50.00 0.00 0.00 50.00
+GCT 85 50.00 0.00 0.00 50.00
+GCT 86 50.00 0.00 0.00 50.00
+GCT 87 50.00 0.00 0.00 50.00
+GCT 88 50.00 0.00 0.00 50.00
+GCT 89 50.00 0.00 0.00 50.00
+GCT 90 50.00 0.00 0.00 50.00
+GCT 91 50.00 0.00 50.00 0.00
+GCT 92 50.00 0.00 50.00 0.00
+GCT 93 50.00 0.00 50.00 0.00
+GCT 94 50.00 0.00 0.00 50.00
+GCT 95 50.00 0.00 50.00 0.00
+GCT 96 50.00 0.00 50.00 0.00
+GCT 97 50.00 0.00 50.00 0.00
+GCT 98 50.00 0.00 50.00 0.00
+GCT 99 50.00 0.00 50.00 0.00
+GCT 100 50.00 0.00 0.00 50.00
# ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
FBC 1 0.00 0.00 0.00 100.00 0.00 0.00
FBC 2 0.00 0.00 100.00 0.00 0.00 0.00
@@ -454,6 +559,8 @@
FBC 98 0.00 100.00 0.00 0.00 0.00 0.00
FBC 99 0.00 100.00 0.00 0.00 0.00 0.00
FBC 100 100.00 0.00 0.00 0.00 0.00 0.00
+# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+FTC 50 21 14 15 0
# ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
LBC 1 0.00 0.00 100.00 0.00 0.00 0.00
LBC 2 0.00 0.00 100.00 0.00 0.00 0.00
@@ -555,6 +662,26 @@
LBC 98 100.00 0.00 0.00 0.00 0.00 0.00
LBC 99 100.00 0.00 0.00 0.00 0.00 0.00
LBC 100 100.00 0.00 0.00 0.00 0.00 0.00
+# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+LTC 41 20 23 16 0
+# ACGT content per cycle for barcodes. Use `grep ^BCC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N counts as a percentage of all A/C/G/T bases [%]
+BCC1 2 0.00 0.00 100.00 0.00 0.00
+BCC1 3 0.00 0.00 0.00 100.00 0.00
+BCC1 4 0.00 100.00 0.00 0.00 0.00
+BCC1 5 0.00 0.00 0.00 100.00 0.00
+BCC1 6 100.00 0.00 0.00 0.00 0.00
+BCC1 7 0.00 0.00 0.00 100.00 0.00
+BCC1 8 0.00 100.00 0.00 0.00 0.00
+# Barcode Qualities. Use `grep ^QTQ | cut -f 2-` to extract this part.
+# Columns correspond to qualities and rows to barcode cycles. First column is the cycle number.
+QTQ1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+QTQ1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+QTQ1 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
+QTQ1 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
+QTQ1 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
+QTQ1 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
+QTQ1 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
+QTQ1 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
# Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs
IS 0 0 0 0 0
IS 1 0 0 0 0
diff -r df598009c821 -r ab75c7ea49a5 test-data/12.3reads.nooverlap.expected
--- a/test-data/12.3reads.nooverlap.expected Thu Oct 10 03:39:57 2019 -0400
+++ b/test-data/12.3reads.nooverlap.expected Tue Sep 28 16:08:22 2021 +0000
@@ -1,8 +1,11 @@
+# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file contains statistics for all reads.
+# The command line was: stats -t /tmp/tmp0r5zs075/files/1/3/0/dataset_13082855-efe1-437e-8a91-ff7d013770db.dat -p -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)
CHK 09f8b87f 140798ec 2b989f07
# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.
-SN raw total sequences: 3
+SN raw total sequences: 3 # excluding supplementary and secondary reads
SN filtered sequences: 0
SN sequences: 3
SN is sorted: 1
@@ -17,6 +20,7 @@
SN reads MQ0: 0 # mapped and MQ=0
SN reads QC failed: 0
SN non-primary alignments: 0
+SN supplementary alignments: 0
SN total length: 300 # ignores clipping
SN total first fragment length: 100 # ignores clipping
SN total last fragment length: 200 # ignores clipping
@@ -355,6 +359,107 @@
GCC 98 0.00 66.67 0.00 33.33 0.00 0.00
GCC 99 0.00 100.00 0.00 0.00 0.00 0.00
GCC 100 0.00 0.00 0.00 100.00 0.00 0.00
+# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]
+GCT 1 33.33 0.00 0.00 66.67
+GCT 2 0.00 33.33 33.33 33.33
+GCT 3 0.00 33.33 33.33 33.33
+GCT 4 33.33 66.67 0.00 0.00
+GCT 5 0.00 66.67 33.33 0.00
+GCT 6 33.33 0.00 0.00 66.67
+GCT 7 33.33 0.00 0.00 66.67
+GCT 8 0.00 33.33 33.33 33.33
+GCT 9 0.00 66.67 33.33 0.00
+GCT 10 33.33 33.33 0.00 33.33
+GCT 11 33.33 33.33 0.00 33.33
+GCT 12 0.00 33.33 33.33 33.33
+GCT 13 0.00 0.00 33.33 66.67
+GCT 14 0.00 66.67 33.33 0.00
+GCT 15 33.33 66.67 0.00 0.00
+GCT 16 33.33 0.00 0.00 66.67
+GCT 17 33.33 0.00 0.00 66.67
+GCT 18 0.00 33.33 33.33 33.33
+GCT 19 0.00 33.33 33.33 33.33
+GCT 20 0.00 66.67 33.33 0.00
+GCT 21 33.33 66.67 0.00 0.00
+GCT 22 33.33 0.00 0.00 66.67
+GCT 23 0.00 0.00 33.33 66.67
+GCT 24 0.00 33.33 33.33 33.33
+GCT 25 33.33 66.67 0.00 0.00
+GCT 26 33.33 33.33 0.00 33.33
+GCT 27 0.00 33.33 33.33 33.33
+GCT 28 0.00 33.33 33.33 33.33
+GCT 29 0.00 0.00 33.33 66.67
+GCT 30 33.33 66.67 0.00 0.00
+GCT 31 33.33 66.67 0.00 0.00
+GCT 32 33.33 0.00 0.00 66.67
+GCT 33 0.00 0.00 33.33 66.67
+GCT 34 0.00 33.33 33.33 33.33
+GCT 35 33.33 33.33 0.00 33.33
+GCT 36 0.00 66.67 33.33 0.00
+GCT 37 33.33 66.67 0.00 0.00
+GCT 38 33.33 0.00 0.00 66.67
+GCT 39 0.00 0.00 33.33 66.67
+GCT 40 0.00 33.33 33.33 33.33
+GCT 41 33.33 66.67 0.00 0.00
+GCT 42 33.33 33.33 0.00 33.33
+GCT 43 0.00 33.33 33.33 33.33
+GCT 44 0.00 33.33 33.33 33.33
+GCT 45 0.00 33.33 33.33 33.33
+GCT 46 33.33 66.67 0.00 0.00
+GCT 47 33.33 33.33 0.00 33.33
+GCT 48 33.33 0.00 0.00 66.67
+GCT 49 0.00 0.00 33.33 66.67
+GCT 50 0.00 66.67 33.33 0.00
+GCT 51 33.33 66.67 0.00 0.00
+GCT 52 0.00 66.67 33.33 0.00
+GCT 53 33.33 0.00 0.00 66.67
+GCT 54 33.33 0.00 0.00 66.67
+GCT 55 0.00 33.33 33.33 33.33
+GCT 56 0.00 66.67 33.33 0.00
+GCT 57 33.33 33.33 0.00 33.33
+GCT 58 33.33 33.33 0.00 33.33
+GCT 59 0.00 33.33 33.33 33.33
+GCT 60 0.00 0.00 33.33 66.67
+GCT 61 0.00 66.67 33.33 0.00
+GCT 62 33.33 66.67 0.00 0.00
+GCT 63 33.33 0.00 0.00 66.67
+GCT 64 33.33 0.00 0.00 66.67
+GCT 65 0.00 33.33 33.33 33.33
+GCT 66 0.00 33.33 33.33 33.33
+GCT 67 33.33 66.67 0.00 0.00
+GCT 68 0.00 66.67 33.33 0.00
+GCT 69 33.33 0.00 0.00 66.67
+GCT 70 33.33 0.00 0.00 66.67
+GCT 71 0.00 33.33 33.33 33.33
+GCT 72 0.00 66.67 33.33 0.00
+GCT 73 33.33 33.33 0.00 33.33
+GCT 74 33.33 33.33 0.00 33.33
+GCT 75 0.00 33.33 33.33 33.33
+GCT 76 0.00 0.00 33.33 66.67
+GCT 77 0.00 66.67 33.33 0.00
+GCT 78 33.33 66.67 0.00 0.00
+GCT 79 33.33 0.00 0.00 66.67
+GCT 80 33.33 0.00 0.00 66.67
+GCT 81 0.00 33.33 33.33 33.33
+GCT 82 0.00 66.67 33.33 0.00
+GCT 83 0.00 66.67 33.33 0.00
+GCT 84 33.33 33.33 0.00 33.33
+GCT 85 33.33 0.00 0.00 66.67
+GCT 86 0.00 0.00 33.33 66.67
+GCT 87 0.00 66.67 33.33 0.00
+GCT 88 33.33 66.67 0.00 0.00
+GCT 89 33.33 33.33 0.00 33.33
+GCT 90 0.00 0.00 33.33 66.67
+GCT 91 0.00 33.33 33.33 33.33
+GCT 92 0.00 66.67 33.33 0.00
+GCT 93 33.33 66.67 0.00 0.00
+GCT 94 33.33 33.33 0.00 33.33
+GCT 95 33.33 0.00 0.00 66.67
+GCT 96 0.00 33.33 33.33 33.33
+GCT 97 33.33 0.00 33.33 33.33
+GCT 98 33.33 66.67 0.00 0.00
+GCT 99 0.00 66.67 33.33 0.00
+GCT 100 33.33 0.00 0.00 66.67
# ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
FBC 1 0.00 0.00 0.00 100.00 0.00 0.00
FBC 2 0.00 100.00 0.00 0.00 0.00 0.00
@@ -456,6 +561,8 @@
FBC 98 0.00 0.00 0.00 100.00 0.00 0.00
FBC 99 0.00 100.00 0.00 0.00 0.00 0.00
FBC 100 0.00 0.00 0.00 100.00 0.00 0.00
+# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+FTC 0 51 0 49 0
# ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
LBC 1 0.00 0.00 0.00 100.00 0.00 0.00
LBC 2 0.00 50.00 0.00 50.00 0.00 0.00
@@ -557,6 +664,26 @@
LBC 98 0.00 100.00 0.00 0.00 0.00 0.00
LBC 99 0.00 100.00 0.00 0.00 0.00 0.00
LBC 100 0.00 0.00 0.00 100.00 0.00 0.00
+# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+LTC 1 102 0 97 0
+# ACGT content per cycle for barcodes. Use `grep ^BCC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N counts as a percentage of all A/C/G/T bases [%]
+BCC1 2 0.00 0.00 100.00 0.00 0.00
+BCC1 3 0.00 0.00 0.00 100.00 0.00
+BCC1 4 0.00 100.00 0.00 0.00 0.00
+BCC1 5 0.00 0.00 0.00 100.00 0.00
+BCC1 6 100.00 0.00 0.00 0.00 0.00
+BCC1 7 0.00 0.00 0.00 100.00 0.00
+BCC1 8 0.00 100.00 0.00 0.00 0.00
+# Barcode Qualities. Use `grep ^QTQ | cut -f 2-` to extract this part.
+# Columns correspond to qualities and rows to barcode cycles. First column is the cycle number.
+QTQ1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+QTQ1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+QTQ1 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
+QTQ1 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
+QTQ1 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
+QTQ1 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
+QTQ1 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
+QTQ1 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
# Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs
IS 0 0 0 0 0
IS 1 0 0 0 0
diff -r df598009c821 -r ab75c7ea49a5 test-data/12.3reads.overlap.expected
--- a/test-data/12.3reads.overlap.expected Thu Oct 10 03:39:57 2019 -0400
+++ b/test-data/12.3reads.overlap.expected Tue Sep 28 16:08:22 2021 +0000
@@ -1,8 +1,11 @@
+# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file contains statistics for all reads.
+# The command line was: stats -t /tmp/tmp0r5zs075/files/6/5/e/dataset_65ea4e4d-a70d-4001-911d-9d81ff2829a6.dat -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)
CHK 09f8b87f 140798ec 2b989f07
# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.
-SN raw total sequences: 3
+SN raw total sequences: 3 # excluding supplementary and secondary reads
SN filtered sequences: 0
SN sequences: 3
SN is sorted: 1
@@ -17,6 +20,7 @@
SN reads MQ0: 0 # mapped and MQ=0
SN reads QC failed: 0
SN non-primary alignments: 0
+SN supplementary alignments: 0
SN total length: 300 # ignores clipping
SN total first fragment length: 100 # ignores clipping
SN total last fragment length: 200 # ignores clipping
@@ -355,6 +359,107 @@
GCC 98 0.00 66.67 0.00 33.33 0.00 0.00
GCC 99 0.00 100.00 0.00 0.00 0.00 0.00
GCC 100 0.00 0.00 0.00 100.00 0.00 0.00
+# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]
+GCT 1 33.33 0.00 0.00 66.67
+GCT 2 0.00 33.33 33.33 33.33
+GCT 3 0.00 33.33 33.33 33.33
+GCT 4 33.33 66.67 0.00 0.00
+GCT 5 0.00 66.67 33.33 0.00
+GCT 6 33.33 0.00 0.00 66.67
+GCT 7 33.33 0.00 0.00 66.67
+GCT 8 0.00 33.33 33.33 33.33
+GCT 9 0.00 66.67 33.33 0.00
+GCT 10 33.33 33.33 0.00 33.33
+GCT 11 33.33 33.33 0.00 33.33
+GCT 12 0.00 33.33 33.33 33.33
+GCT 13 0.00 0.00 33.33 66.67
+GCT 14 0.00 66.67 33.33 0.00
+GCT 15 33.33 66.67 0.00 0.00
+GCT 16 33.33 0.00 0.00 66.67
+GCT 17 33.33 0.00 0.00 66.67
+GCT 18 0.00 33.33 33.33 33.33
+GCT 19 0.00 33.33 33.33 33.33
+GCT 20 0.00 66.67 33.33 0.00
+GCT 21 33.33 66.67 0.00 0.00
+GCT 22 33.33 0.00 0.00 66.67
+GCT 23 0.00 0.00 33.33 66.67
+GCT 24 0.00 33.33 33.33 33.33
+GCT 25 33.33 66.67 0.00 0.00
+GCT 26 33.33 33.33 0.00 33.33
+GCT 27 0.00 33.33 33.33 33.33
+GCT 28 0.00 33.33 33.33 33.33
+GCT 29 0.00 0.00 33.33 66.67
+GCT 30 33.33 66.67 0.00 0.00
+GCT 31 33.33 66.67 0.00 0.00
+GCT 32 33.33 0.00 0.00 66.67
+GCT 33 0.00 0.00 33.33 66.67
+GCT 34 0.00 33.33 33.33 33.33
+GCT 35 33.33 33.33 0.00 33.33
+GCT 36 0.00 66.67 33.33 0.00
+GCT 37 33.33 66.67 0.00 0.00
+GCT 38 33.33 0.00 0.00 66.67
+GCT 39 0.00 0.00 33.33 66.67
+GCT 40 0.00 33.33 33.33 33.33
+GCT 41 33.33 66.67 0.00 0.00
+GCT 42 33.33 33.33 0.00 33.33
+GCT 43 0.00 33.33 33.33 33.33
+GCT 44 0.00 33.33 33.33 33.33
+GCT 45 0.00 33.33 33.33 33.33
+GCT 46 33.33 66.67 0.00 0.00
+GCT 47 33.33 33.33 0.00 33.33
+GCT 48 33.33 0.00 0.00 66.67
+GCT 49 0.00 0.00 33.33 66.67
+GCT 50 0.00 66.67 33.33 0.00
+GCT 51 33.33 66.67 0.00 0.00
+GCT 52 0.00 66.67 33.33 0.00
+GCT 53 33.33 0.00 0.00 66.67
+GCT 54 33.33 0.00 0.00 66.67
+GCT 55 0.00 33.33 33.33 33.33
+GCT 56 0.00 66.67 33.33 0.00
+GCT 57 33.33 33.33 0.00 33.33
+GCT 58 33.33 33.33 0.00 33.33
+GCT 59 0.00 33.33 33.33 33.33
+GCT 60 0.00 0.00 33.33 66.67
+GCT 61 0.00 66.67 33.33 0.00
+GCT 62 33.33 66.67 0.00 0.00
+GCT 63 33.33 0.00 0.00 66.67
+GCT 64 33.33 0.00 0.00 66.67
+GCT 65 0.00 33.33 33.33 33.33
+GCT 66 0.00 33.33 33.33 33.33
+GCT 67 33.33 66.67 0.00 0.00
+GCT 68 0.00 66.67 33.33 0.00
+GCT 69 33.33 0.00 0.00 66.67
+GCT 70 33.33 0.00 0.00 66.67
+GCT 71 0.00 33.33 33.33 33.33
+GCT 72 0.00 66.67 33.33 0.00
+GCT 73 33.33 33.33 0.00 33.33
+GCT 74 33.33 33.33 0.00 33.33
+GCT 75 0.00 33.33 33.33 33.33
+GCT 76 0.00 0.00 33.33 66.67
+GCT 77 0.00 66.67 33.33 0.00
+GCT 78 33.33 66.67 0.00 0.00
+GCT 79 33.33 0.00 0.00 66.67
+GCT 80 33.33 0.00 0.00 66.67
+GCT 81 0.00 33.33 33.33 33.33
+GCT 82 0.00 66.67 33.33 0.00
+GCT 83 0.00 66.67 33.33 0.00
+GCT 84 33.33 33.33 0.00 33.33
+GCT 85 33.33 0.00 0.00 66.67
+GCT 86 0.00 0.00 33.33 66.67
+GCT 87 0.00 66.67 33.33 0.00
+GCT 88 33.33 66.67 0.00 0.00
+GCT 89 33.33 33.33 0.00 33.33
+GCT 90 0.00 0.00 33.33 66.67
+GCT 91 0.00 33.33 33.33 33.33
+GCT 92 0.00 66.67 33.33 0.00
+GCT 93 33.33 66.67 0.00 0.00
+GCT 94 33.33 33.33 0.00 33.33
+GCT 95 33.33 0.00 0.00 66.67
+GCT 96 0.00 33.33 33.33 33.33
+GCT 97 33.33 0.00 33.33 33.33
+GCT 98 33.33 66.67 0.00 0.00
+GCT 99 0.00 66.67 33.33 0.00
+GCT 100 33.33 0.00 0.00 66.67
# ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
FBC 1 0.00 0.00 0.00 100.00 0.00 0.00
FBC 2 0.00 100.00 0.00 0.00 0.00 0.00
@@ -456,6 +561,8 @@
FBC 98 0.00 0.00 0.00 100.00 0.00 0.00
FBC 99 0.00 100.00 0.00 0.00 0.00 0.00
FBC 100 0.00 0.00 0.00 100.00 0.00 0.00
+# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+FTC 0 51 0 49 0
# ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
LBC 1 0.00 0.00 0.00 100.00 0.00 0.00
LBC 2 0.00 50.00 0.00 50.00 0.00 0.00
@@ -557,6 +664,26 @@
LBC 98 0.00 100.00 0.00 0.00 0.00 0.00
LBC 99 0.00 100.00 0.00 0.00 0.00 0.00
LBC 100 0.00 0.00 0.00 100.00 0.00 0.00
+# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+LTC 1 102 0 97 0
+# ACGT content per cycle for barcodes. Use `grep ^BCC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N counts as a percentage of all A/C/G/T bases [%]
+BCC1 2 0.00 0.00 100.00 0.00 0.00
+BCC1 3 0.00 0.00 0.00 100.00 0.00
+BCC1 4 0.00 100.00 0.00 0.00 0.00
+BCC1 5 0.00 0.00 0.00 100.00 0.00
+BCC1 6 100.00 0.00 0.00 0.00 0.00
+BCC1 7 0.00 0.00 0.00 100.00 0.00
+BCC1 8 0.00 100.00 0.00 0.00 0.00
+# Barcode Qualities. Use `grep ^QTQ | cut -f 2-` to extract this part.
+# Columns correspond to qualities and rows to barcode cycles. First column is the cycle number.
+QTQ1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+QTQ1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+QTQ1 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
+QTQ1 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
+QTQ1 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
+QTQ1 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
+QTQ1 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
+QTQ1 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
# Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs
IS 0 0 0 0 0
IS 1 0 0 0 0
diff -r df598009c821 -r ab75c7ea49a5 test-data/2.stats.expected
--- a/test-data/2.stats.expected Thu Oct 10 03:39:57 2019 -0400
+++ b/test-data/2.stats.expected Tue Sep 28 16:08:22 2021 +0000
@@ -1,8 +1,11 @@
+# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file contains statistics for all reads.
+# The command line was: stats --ref-seq reference.fa -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)
CHK 1a1c1362 29c426ae 7bab45da
# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.
-SN raw total sequences: 2
+SN raw total sequences: 2 # excluding supplementary and secondary reads
SN filtered sequences: 0
SN sequences: 2
SN is sorted: 1
@@ -17,6 +20,7 @@
SN reads MQ0: 0 # mapped and MQ=0
SN reads QC failed: 0
SN non-primary alignments: 0
+SN supplementary alignments: 0
SN total length: 70 # ignores clipping
SN total first fragment length: 35 # ignores clipping
SN total last fragment length: 35 # ignores clipping
@@ -195,6 +199,42 @@
GCC 33 50.00 0.00 0.00 50.00 0.00 0.00
GCC 34 50.00 0.00 50.00 0.00 0.00 0.00
GCC 35 0.00 0.00 50.00 50.00 0.00 0.00
+# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]
+GCT 1 0.00 50.00 0.00 50.00
+GCT 2 50.00 0.00 50.00 0.00
+GCT 3 0.00 0.00 50.00 50.00
+GCT 4 0.00 0.00 100.00 0.00
+GCT 5 0.00 0.00 50.00 50.00
+GCT 6 0.00 50.00 0.00 50.00
+GCT 7 0.00 0.00 50.00 50.00
+GCT 8 0.00 0.00 0.00 100.00
+GCT 9 0.00 50.00 50.00 0.00
+GCT 10 50.00 0.00 50.00 0.00
+GCT 11 50.00 0.00 0.00 50.00
+GCT 12 50.00 0.00 50.00 0.00
+GCT 13 50.00 0.00 50.00 0.00
+GCT 14 0.00 0.00 0.00 100.00
+GCT 15 100.00 0.00 0.00 0.00
+GCT 16 50.00 0.00 0.00 50.00
+GCT 17 0.00 0.00 50.00 50.00
+GCT 18 0.00 50.00 50.00 0.00
+GCT 19 0.00 100.00 0.00 0.00
+GCT 20 0.00 0.00 50.00 50.00
+GCT 21 0.00 0.00 100.00 0.00
+GCT 22 0.00 50.00 0.00 50.00
+GCT 23 50.00 0.00 0.00 50.00
+GCT 24 50.00 0.00 50.00 0.00
+GCT 25 50.00 0.00 50.00 0.00
+GCT 26 0.00 0.00 100.00 0.00
+GCT 27 50.00 0.00 0.00 50.00
+GCT 28 0.00 0.00 50.00 50.00
+GCT 29 0.00 50.00 0.00 50.00
+GCT 30 0.00 50.00 0.00 50.00
+GCT 31 0.00 50.00 50.00 0.00
+GCT 32 0.00 0.00 100.00 0.00
+GCT 33 100.00 0.00 0.00 0.00
+GCT 34 0.00 0.00 50.00 50.00
+GCT 35 50.00 0.00 50.00 0.00
# ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
FBC 1 0.00 0.00 0.00 100.00 0.00 0.00
FBC 2 0.00 0.00 100.00 0.00 0.00 0.00
@@ -231,6 +271,8 @@
FBC 33 100.00 0.00 0.00 0.00 0.00 0.00
FBC 34 0.00 0.00 100.00 0.00 0.00 0.00
FBC 35 0.00 0.00 100.00 0.00 0.00 0.00
+# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+FTC 6 5 15 9 0
# ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
LBC 1 0.00 0.00 100.00 0.00 0.00 0.00
LBC 2 0.00 0.00 0.00 100.00 0.00 0.00
@@ -267,6 +309,8 @@
LBC 33 0.00 0.00 0.00 100.00 0.00 0.00
LBC 34 100.00 0.00 0.00 0.00 0.00 0.00
LBC 35 0.00 0.00 0.00 100.00 0.00 0.00
+# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+LTC 11 10 5 9 0
# Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs
IS 0 0 0 0 0
IS 1 0 0 0 0
diff -r df598009c821 -r ab75c7ea49a5 test-data/6.stats.expected
--- a/test-data/6.stats.expected Thu Oct 10 03:39:57 2019 -0400
+++ b/test-data/6.stats.expected Tue Sep 28 16:08:22 2021 +0000
@@ -1,8 +1,11 @@
+# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
+# This file contains statistics for all reads.
+# The command line was: stats --insert-size 0 --ref-seq reference.fa -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)
CHK 1a1c1362 32507d92 7bab45da
# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.
-SN raw total sequences: 2
+SN raw total sequences: 2 # excluding supplementary and secondary reads
SN filtered sequences: 0
SN sequences: 2
SN is sorted: 1
@@ -17,6 +20,7 @@
SN reads MQ0: 0 # mapped and MQ=0
SN reads QC failed: 0
SN non-primary alignments: 0
+SN supplementary alignments: 0
SN total length: 70 # ignores clipping
SN total first fragment length: 35 # ignores clipping
SN total last fragment length: 35 # ignores clipping
@@ -195,6 +199,42 @@
GCC 33 0.00 0.00 50.00 50.00 0.00 0.00
GCC 34 100.00 0.00 0.00 0.00 0.00 0.00
GCC 35 0.00 0.00 50.00 50.00 0.00 0.00
+# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]
+GCT 1 0.00 50.00 0.00 50.00
+GCT 2 50.00 0.00 50.00 0.00
+GCT 3 0.00 0.00 50.00 50.00
+GCT 4 0.00 0.00 100.00 0.00
+GCT 5 0.00 0.00 50.00 50.00
+GCT 6 0.00 50.00 50.00 0.00
+GCT 7 0.00 0.00 0.00 100.00
+GCT 8 0.00 0.00 50.00 50.00
+GCT 9 0.00 0.00 50.00 50.00
+GCT 10 0.00 50.00 50.00 0.00
+GCT 11 100.00 0.00 0.00 0.00
+GCT 12 0.00 0.00 50.00 50.00
+GCT 13 100.00 0.00 0.00 0.00
+GCT 14 0.00 0.00 50.00 50.00
+GCT 15 50.00 0.00 0.00 50.00
+GCT 16 50.00 0.00 0.00 50.00
+GCT 17 50.00 0.00 50.00 0.00
+GCT 18 0.00 0.00 50.00 50.00
+GCT 19 0.00 100.00 0.00 0.00
+GCT 20 0.00 50.00 0.00 50.00
+GCT 21 0.00 0.00 100.00 0.00
+GCT 22 0.00 50.00 50.00 0.00
+GCT 23 50.00 0.00 0.00 50.00
+GCT 24 50.00 0.00 0.00 50.00
+GCT 25 50.00 0.00 50.00 0.00
+GCT 26 0.00 0.00 100.00 0.00
+GCT 27 0.00 0.00 50.00 50.00
+GCT 28 50.00 0.00 0.00 50.00
+GCT 29 0.00 50.00 50.00 0.00
+GCT 30 0.00 0.00 0.00 100.00
+GCT 31 0.00 50.00 50.00 0.00
+GCT 32 0.00 50.00 50.00 0.00
+GCT 33 50.00 0.00 50.00 0.00
+GCT 34 50.00 0.00 0.00 50.00
+GCT 35 50.00 0.00 50.00 0.00
# ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
FBC 1 0.00 0.00 0.00 100.00 0.00 0.00
FBC 2 0.00 0.00 100.00 0.00 0.00 0.00
@@ -231,6 +271,8 @@
FBC 33 0.00 0.00 100.00 0.00 0.00 0.00
FBC 34 100.00 0.00 0.00 0.00 0.00 0.00
FBC 35 0.00 0.00 100.00 0.00 0.00 0.00
+# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+FTC 6 5 15 9 0
# ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
LBC 1 0.00 0.00 100.00 0.00 0.00 0.00
LBC 2 0.00 0.00 0.00 100.00 0.00 0.00
@@ -267,6 +309,8 @@
LBC 33 0.00 0.00 0.00 100.00 0.00 0.00
LBC 34 100.00 0.00 0.00 0.00 0.00 0.00
LBC 35 0.00 0.00 0.00 100.00 0.00 0.00
+# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+LTC 11 10 5 9 0
# Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs
IS 0 0 0 0 0
IS 1 0 0 0 0
diff -r df598009c821 -r ab75c7ea49a5 test-data/samtools_stats_out1.tab
--- a/test-data/samtools_stats_out1.tab Thu Oct 10 03:39:57 2019 -0400
+++ b/test-data/samtools_stats_out1.tab Tue Sep 28 16:08:22 2021 +0000
@@ -1,11 +1,11 @@
-# This file was produced by samtools stats (1.9+htslib-1.9) and can be plotted using plot-bamstats
+# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats
# This file contains statistics for all reads.
-# The command line was: stats --ref-seq reference.fa infile
+# The command line was: stats --ref-seq reference.fa -@ 0 infile
# CHK, Checksum [2]Read Names [3]Sequences [4]Qualities
# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)
CHK 1bd20fd8 58ad2167 29883386
# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.
-SN raw total sequences: 200
+SN raw total sequences: 200 # excluding supplementary and secondary reads
SN filtered sequences: 0
SN sequences: 200
SN is sorted: 1
@@ -20,6 +20,7 @@
SN reads MQ0: 6 # mapped and MQ=0
SN reads QC failed: 0
SN non-primary alignments: 0
+SN supplementary alignments: 0
SN total length: 50200 # ignores clipping
SN total first fragment length: 25100 # ignores clipping
SN total last fragment length: 25100 # ignores clipping
@@ -1094,6 +1095,258 @@
GCC 249 25.00 23.00 21.00 31.00 0.00 0.00
GCC 250 27.50 22.50 17.50 32.50 0.00 0.00
GCC 251 13.50 20.50 36.50 29.50 0.00 0.00
+# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]
+GCT 1 22.45 32.65 31.63 13.27
+GCT 2 26.00 17.00 10.00 47.00
+GCT 3 34.00 25.00 9.00 32.00
+GCT 4 13.00 32.00 10.00 45.00
+GCT 5 37.00 24.00 10.00 29.00
+GCT 6 44.00 26.00 17.00 13.00
+GCT 7 28.00 13.00 32.00 27.00
+GCT 8 49.00 16.00 18.00 17.00
+GCT 9 19.00 35.00 16.00 30.00
+GCT 10 35.00 13.00 22.00 30.00
+GCT 11 18.00 15.00 29.00 38.00
+GCT 12 28.00 24.00 22.00 26.00
+GCT 13 29.00 17.00 19.00 35.00
+GCT 14 21.21 24.24 24.24 30.30
+GCT 15 23.00 20.00 25.00 32.00
+GCT 16 35.00 15.00 20.00 30.00
+GCT 17 29.00 26.00 13.00 32.00
+GCT 18 31.00 30.00 18.00 21.00
+GCT 19 30.00 26.00 21.00 23.00
+GCT 20 23.00 21.00 20.00 36.00
+GCT 21 36.00 18.00 19.00 27.00
+GCT 22 39.00 31.00 13.00 17.00
+GCT 23 30.30 21.21 11.11 37.37
+GCT 24 36.00 14.00 20.00 30.00
+GCT 25 38.00 23.00 17.00 22.00
+GCT 26 36.00 21.00 13.00 30.00
+GCT 27 32.00 18.00 22.00 28.00
+GCT 28 30.00 13.00 25.00 32.00
+GCT 29 30.00 28.00 23.00 19.00
+GCT 30 34.00 21.00 13.00 32.00
+GCT 31 33.00 29.00 14.00 24.00
+GCT 32 38.00 20.00 20.00 22.00
+GCT 33 27.00 23.00 15.00 35.00
+GCT 34 43.00 17.00 20.00 20.00
+GCT 35 35.00 23.00 22.00 20.00
+GCT 36 41.00 25.00 16.00 18.00
+GCT 37 38.00 18.00 18.00 26.00
+GCT 38 23.00 26.00 25.00 26.00
+GCT 39 22.00 41.00 19.00 18.00
+GCT 40 29.00 14.00 23.00 34.00
+GCT 41 29.00 27.00 20.00 24.00
+GCT 42 29.00 27.00 18.00 26.00
+GCT 43 38.00 28.00 16.00 18.00
+GCT 44 21.00 26.00 23.00 30.00
+GCT 45 38.00 25.00 18.00 19.00
+GCT 46 27.00 21.00 22.00 30.00
+GCT 47 40.00 28.00 18.00 14.00
+GCT 48 25.00 22.00 20.00 33.00
+GCT 49 26.00 33.00 10.00 31.00
+GCT 50 49.00 20.00 10.00 21.00
+GCT 51 29.00 29.00 13.00 29.00
+GCT 52 43.00 22.00 22.00 13.00
+GCT 53 19.00 27.00 27.00 27.00
+GCT 54 25.00 18.00 25.00 32.00
+GCT 55 32.00 22.00 17.00 29.00
+GCT 56 36.00 20.00 16.00 28.00
+GCT 57 28.00 24.00 23.00 25.00
+GCT 58 23.00 39.00 17.00 21.00
+GCT 59 33.00 21.00 26.00 20.00
+GCT 60 30.00 26.00 19.00 25.00
+GCT 61 23.00 23.00 25.00 29.00
+GCT 62 35.00 19.00 20.00 26.00
+GCT 63 26.00 25.00 29.00 20.00
+GCT 64 30.00 20.00 27.00 23.00
+GCT 65 38.00 17.00 21.00 24.00
+GCT 66 33.00 26.00 14.00 27.00
+GCT 67 35.00 21.00 21.00 23.00
+GCT 68 39.00 28.00 15.00 18.00
+GCT 69 38.00 18.00 22.00 22.00
+GCT 70 40.00 13.00 22.00 25.00
+GCT 71 29.00 41.00 19.00 11.00
+GCT 72 33.00 26.00 28.00 13.00
+GCT 73 31.00 18.00 24.00 27.00
+GCT 74 31.00 32.00 13.00 24.00
+GCT 75 46.00 10.00 21.00 23.00
+GCT 76 23.00 28.00 25.00 24.00
+GCT 77 35.00 20.00 19.00 26.00
+GCT 78 29.00 30.00 26.00 15.00
+GCT 79 24.00 25.00 27.00 24.00
+GCT 80 40.00 18.00 29.00 13.00
+GCT 81 18.00 26.00 26.00 30.00
+GCT 82 30.00 25.00 28.00 17.00
+GCT 83 36.00 25.00 14.00 25.00
+GCT 84 22.00 39.00 23.00 16.00
+GCT 85 42.00 20.00 21.00 17.00
+GCT 86 12.00 47.00 16.00 25.00
+GCT 87 45.00 15.00 19.00 21.00
+GCT 88 26.00 24.00 31.00 19.00
+GCT 89 26.00 25.00 29.00 20.00
+GCT 90 24.00 22.00 26.00 28.00
+GCT 91 29.00 30.00 24.00 17.00
+GCT 92 22.00 13.00 32.00 33.00
+GCT 93 29.00 40.00 13.00 18.00
+GCT 94 30.00 16.00 25.00 29.00
+GCT 95 31.31 35.35 17.17 16.16
+GCT 96 34.00 27.00 15.00 24.00
+GCT 97 32.00 27.00 25.00 16.00
+GCT 98 21.21 31.31 24.24 23.23
+GCT 99 27.00 37.00 21.00 15.00
+GCT 100 29.00 15.00 26.00 30.00
+GCT 101 27.00 34.00 25.00 14.00
+GCT 102 20.00 14.00 36.00 30.00
+GCT 103 32.00 26.00 16.00 26.00
+GCT 104 30.00 28.00 22.00 20.00
+GCT 105 35.00 35.00 19.00 11.00
+GCT 106 22.00 32.00 25.00 21.00
+GCT 107 19.19 33.33 26.26 21.21
+GCT 108 27.27 33.33 21.21 18.18
+GCT 109 19.00 29.00 24.00 28.00
+GCT 110 20.20 31.31 28.28 20.20
+GCT 111 25.25 33.33 22.22 19.19
+GCT 112 39.39 28.28 16.16 16.16
+GCT 113 21.21 24.24 31.31 23.23
+GCT 114 19.00 31.00 35.00 15.00
+GCT 115 32.00 23.00 25.00 20.00
+GCT 116 20.00 29.00 35.00 16.00
+GCT 117 31.00 21.00 21.00 27.00
+GCT 118 25.00 14.00 35.00 26.00
+GCT 119 21.00 31.00 24.00 24.00
+GCT 120 19.00 24.00 27.00 30.00
+GCT 121 24.24 36.36 20.20 19.19
+GCT 122 18.00 25.00 23.00 34.00
+GCT 123 25.00 48.00 10.00 17.00
+GCT 124 10.00 31.00 35.00 24.00
+GCT 125 27.00 24.00 22.00 27.00
+GCT 126 24.00 19.00 26.00 31.00
+GCT 127 26.00 16.00 22.00 36.00
+GCT 128 16.00 35.00 30.00 19.00
+GCT 129 24.00 27.00 26.00 23.00
+GCT 130 25.00 19.00 23.00 33.00
+GCT 131 17.00 40.00 24.00 19.00
+GCT 132 20.00 27.00 24.00 29.00
+GCT 133 15.00 34.00 33.00 18.00
+GCT 134 22.00 19.00 31.00 28.00
+GCT 135 17.00 28.00 26.00 29.00
+GCT 136 25.00 33.00 14.00 28.00
+GCT 137 21.00 22.00 30.00 27.00
+GCT 138 15.00 30.00 29.00 26.00
+GCT 139 19.00 32.00 22.00 27.00
+GCT 140 19.00 24.00 29.00 28.00
+GCT 141 21.00 24.00 25.00 30.00
+GCT 142 17.00 32.00 35.00 16.00
+GCT 143 23.00 23.00 28.00 26.00
+GCT 144 20.00 33.00 24.00 23.00
+GCT 145 20.00 26.00 24.00 30.00
+GCT 146 26.00 15.00 40.00 19.00
+GCT 147 23.00 23.00 30.00 24.00
+GCT 148 12.00 32.00 33.00 23.00
+GCT 149 27.00 24.00 23.00 26.00
+GCT 150 12.00 32.00 21.00 35.00
+GCT 151 22.00 29.00 34.00 15.00
+GCT 152 16.00 21.00 26.00 37.00
+GCT 153 24.00 20.00 34.00 22.00
+GCT 154 25.00 22.00 28.00 25.00
+GCT 155 21.00 28.00 17.00 34.00
+GCT 156 16.00 23.00 36.00 25.00
+GCT 157 23.00 34.00 20.00 23.00
+GCT 158 21.00 33.00 21.00 25.00
+GCT 159 19.00 15.00 44.00 22.00
+GCT 160 24.00 31.00 22.00 23.00
+GCT 161 13.00 28.00 40.00 19.00
+GCT 162 33.00 19.00 20.00 28.00
+GCT 163 23.00 26.00 31.00 20.00
+GCT 164 33.00 19.00 20.00 28.00
+GCT 165 13.00 16.00 37.00 34.00
+GCT 166 20.00 36.00 30.00 14.00
+GCT 167 17.00 28.00 25.00 30.00
+GCT 168 16.00 24.00 31.00 29.00
+GCT 169 21.00 23.00 21.00 35.00
+GCT 170 22.00 24.00 27.00 27.00
+GCT 171 23.00 30.00 31.00 16.00
+GCT 172 24.00 22.00 30.00 24.00
+GCT 173 24.00 17.00 33.00 26.00
+GCT 174 21.00 24.00 33.00 22.00
+GCT 175 34.00 19.00 34.00 13.00
+GCT 176 17.00 37.00 24.00 22.00
+GCT 177 24.00 24.00 25.00 27.00
+GCT 178 19.00 26.00 30.00 25.00
+GCT 179 24.00 20.00 20.00 36.00
+GCT 180 22.00 25.00 31.00 22.00
+GCT 181 21.00 17.00 38.00 24.00
+GCT 182 35.00 21.00 20.00 24.00
+GCT 183 28.00 15.00 27.00 30.00
+GCT 184 17.00 30.00 29.00 24.00
+GCT 185 16.00 32.00 33.00 19.00
+GCT 186 21.00 22.00 36.00 21.00
+GCT 187 24.00 13.00 41.00 22.00
+GCT 188 29.00 19.00 30.00 22.00
+GCT 189 19.00 23.00 33.00 25.00
+GCT 190 24.00 28.00 25.00 23.00
+GCT 191 28.00 21.00 29.00 22.00
+GCT 192 37.00 25.00 13.00 25.00
+GCT 193 18.00 26.00 25.00 31.00
+GCT 194 21.00 34.00 24.00 21.00
+GCT 195 16.00 28.00 21.00 35.00
+GCT 196 23.00 23.00 27.00 27.00
+GCT 197 18.00 20.00 24.00 38.00
+GCT 198 15.00 19.00 39.00 27.00
+GCT 199 23.00 17.00 27.00 33.00
+GCT 200 25.00 27.00 23.00 25.00
+GCT 201 26.00 19.00 25.00 30.00
+GCT 202 23.00 23.00 21.00 33.00
+GCT 203 21.00 19.00 32.00 28.00
+GCT 204 26.00 29.00 24.00 21.00
+GCT 205 16.00 23.00 21.00 40.00
+GCT 206 28.00 25.00 21.00 26.00
+GCT 207 22.00 22.00 19.00 37.00
+GCT 208 12.00 32.00 32.00 24.00
+GCT 209 18.00 20.00 15.00 47.00
+GCT 210 18.00 15.00 45.00 22.00
+GCT 211 28.00 14.00 28.00 30.00
+GCT 212 23.00 27.00 22.00 28.00
+GCT 213 22.00 12.00 34.00 32.00
+GCT 214 16.00 24.00 26.00 34.00
+GCT 215 28.00 22.00 25.00 25.00
+GCT 216 18.00 17.00 26.00 39.00
+GCT 217 27.00 30.00 16.00 27.00
+GCT 218 18.00 24.00 24.00 34.00
+GCT 219 19.00 18.00 24.00 39.00
+GCT 220 28.00 23.00 24.00 25.00
+GCT 221 20.00 24.00 20.00 36.00
+GCT 222 22.00 21.00 19.00 38.00
+GCT 223 19.00 22.00 33.00 26.00
+GCT 224 19.00 20.00 27.00 34.00
+GCT 225 14.00 32.00 34.00 20.00
+GCT 226 20.00 20.00 29.00 31.00
+GCT 227 17.00 22.00 26.00 35.00
+GCT 228 34.00 20.00 25.00 21.00
+GCT 229 22.00 27.00 16.00 35.00
+GCT 230 23.00 33.00 15.00 29.00
+GCT 231 20.00 27.00 31.00 22.00
+GCT 232 25.00 23.00 11.00 41.00
+GCT 233 22.00 25.00 27.00 26.00
+GCT 234 14.00 27.00 23.00 36.00
+GCT 235 17.00 11.00 32.00 40.00
+GCT 236 29.00 20.00 24.00 27.00
+GCT 237 16.00 19.00 23.00 42.00
+GCT 238 22.00 25.00 29.00 24.00
+GCT 239 18.00 8.00 17.00 57.00
+GCT 240 27.00 26.00 14.00 33.00
+GCT 241 21.00 22.00 17.00 40.00
+GCT 242 26.00 28.00 10.00 36.00
+GCT 243 25.00 15.00 24.00 36.00
+GCT 244 15.00 24.00 16.00 45.00
+GCT 245 24.00 23.00 19.00 34.00
+GCT 246 18.00 25.00 29.00 28.00
+GCT 247 30.00 13.00 14.00 43.00
+GCT 248 22.00 17.00 26.00 35.00
+GCT 249 25.00 22.00 22.00 31.00
+GCT 250 25.00 20.00 20.00 35.00
+GCT 251 13.00 17.00 40.00 30.00
# ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
FBC 1 21.43 27.55 36.73 14.29 2.04 0.00
FBC 2 34.00 15.00 12.00 39.00 0.00 0.00
@@ -1346,6 +1599,8 @@
FBC 249 25.00 24.00 20.00 31.00 0.00 0.00
FBC 250 30.00 25.00 15.00 30.00 0.00 0.00
FBC 251 14.00 24.00 33.00 29.00 0.00 0.00
+# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+FTC 6178 6247 5821 6841 13
# ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%]
LBC 1 22.45 32.65 31.63 13.27 2.04 0.00
LBC 2 26.00 17.00 10.00 47.00 0.00 0.00
@@ -1598,6 +1853,8 @@
LBC 249 25.00 22.00 22.00 31.00 0.00 0.00
LBC 250 25.00 20.00 20.00 35.00 0.00 0.00
LBC 251 13.00 17.00 40.00 30.00 0.00 0.00
+# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters
+LTC 6413 6102 5966 6606 13
# Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs
# Read lengths. Use `grep ^RL | cut -f 2-` to extract this part. The columns are: read length, count
RL 251 200
diff -r df598009c821 -r ab75c7ea49a5 test-data/samtools_stats_out1__sn.tab
--- a/test-data/samtools_stats_out1__sn.tab Thu Oct 10 03:39:57 2019 -0400
+++ b/test-data/samtools_stats_out1__sn.tab Tue Sep 28 16:08:22 2021 +0000
@@ -1,5 +1,5 @@
# Summary Numbers.
-raw total sequences: 200
+raw total sequences: 200 # excluding supplementary and secondary reads
filtered sequences: 0
sequences: 200
is sorted: 1
@@ -14,6 +14,7 @@
reads MQ0: 6 # mapped and MQ=0
reads QC failed: 0
non-primary alignments: 0
+supplementary alignments: 0
total length: 50200 # ignores clipping
total first fragment length: 25100 # ignores clipping
total last fragment length: 25100 # ignores clipping