Mercurial > repos > estrain > sum_fastqc
changeset 0:cce90961c022 draft
Uploaded
| author | estrain |
|---|---|
| date | Tue, 16 Oct 2018 17:15:52 -0400 |
| parents | |
| children | 52c14941b49f |
| files | sum_fastqc.pl |
| diffstat | 1 files changed, 63 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sum_fastqc.pl Tue Oct 16 17:15:52 2018 -0400 @@ -0,0 +1,63 @@ +#!/usr/bin/perl + +#################################################### +## +## sum_fastqc.pl +## +## Errol Strain (estrain@gmail.com) +## +## Description: Takes raw FASTQC output and produces +## simple table summary +## +#################################################### + +$infile=@ARGV[0]; +$galfile=@ARGV[1]; + +# First 10 lines of raw FASTQC contain basic overview +@sumlines=`head -n 10 $infile`; +chomp(@sumlines); + +# Sequence level Q scores are buried in the middle of the file +@qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; +chomp(@qlines); + +@fastqc = split(/[\n\t]/,shift(@sumlines)); +@pass = split(/\t/,shift(@sumlines)); +shift(@sumlines); +@fn = split(/\t/,shift(@sumlines)); +shift(@sumlines); +shift(@sumlines); +@nreads = split(/\t/,shift(@sumlines)); +@npoor = split(/\t/,shift(@sumlines)); +shift(@sumlines); +@gc = split(/\t/,shift(@sumlines)); + +print "Input Data\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC\tQ30\n"; + +print $galfile."\t"; +print $fn[1]."\t"; +print $fastqc[1]."\t"; +print $pass[1]."\t"; +print $nreads[1]."\t"; +print $npoor[1]."\t"; +print $gc[1]."\t"; +print qcal($nreads[1],30,\@qlines)."\t"; +print "\n"; + +# Sum reads w/ Q scores > cutoff and divide by number of reads +sub qcal { + $nreads=shift(@_); + $cutoff=shift(@_); + @qarray=@{$_[0]}; + $sum = 0; + + foreach $item (@qarray) { + ($qval,$qscore)=split(/\t/,$item); + if($qval>=$cutoff) { + $sum += $qscore; + } + } + $qmean = sprintf("%.2f", 100 * $sum / $nreads); + return $qmean; +}
