Mercurial > repos > estrain > sum_fastqc
diff sum_fastqc.pl @ 5:a2c2dc7bc724 draft
Uploaded
| author | estrain |
|---|---|
| date | Tue, 16 Oct 2018 21:59:38 -0400 |
| parents | 5e1f199bd289 |
| children | b769c810924e |
line wrap: on
line diff
--- a/sum_fastqc.pl Tue Oct 16 21:59:23 2018 -0400 +++ b/sum_fastqc.pl Tue Oct 16 21:59:38 2018 -0400 @@ -11,37 +11,46 @@ ## #################################################### -$infile=@ARGV[0]; +my($inname)=shift(@ARGV); +my($qscore)=shift(@ARGV); + +print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC\tQ".$qscore."\n"; + +foreach (@ARGV) { + print_stats($_); +} -# First 10 lines of raw FASTQC contain basic overview -@sumlines=`head -n 10 $infile`; -chomp(@sumlines); +sub print_stats { + $infile = shift; + # First 10 lines of raw FASTQC contain basic overview + @sumlines=`head -n 10 $infile`; + chomp(@sumlines); -# Sequence level Q scores are buried in the middle of the file -@qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; -chomp(@qlines); + # Sequence level Q scores are buried in the middle of the file + @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; + chomp(@qlines); -@fastqc = split(/[\n\t]/,shift(@sumlines)); -@pass = split(/\t/,shift(@sumlines)); -shift(@sumlines); -@fn = split(/\t/,shift(@sumlines)); -shift(@sumlines); -shift(@sumlines); -@nreads = split(/\t/,shift(@sumlines)); -@npoor = split(/\t/,shift(@sumlines)); -shift(@sumlines); -@gc = split(/\t/,shift(@sumlines)); + @fastqc = split(/[\n\t]/,shift(@sumlines)); + @pass = split(/\t/,shift(@sumlines)); + shift(@sumlines); + @fn = split(/\t/,shift(@sumlines)); + shift(@sumlines); + shift(@sumlines); + @nreads = split(/\t/,shift(@sumlines)); + @npoor = split(/\t/,shift(@sumlines)); + shift(@sumlines); + @gc = split(/\t/,shift(@sumlines)); -print "File\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC\tQ30\n"; - -print $fn[1]."\t"; -print $fastqc[1]."\t"; -print $pass[1]."\t"; -print $nreads[1]."\t"; -print $npoor[1]."\t"; -print $gc[1]."\t"; -print qcal($nreads[1],30,\@qlines); -print "\n"; + print $inname."\t"; + print $fn[1]."\t"; + print $fastqc[1]."\t"; + print $pass[1]."\t"; + print $nreads[1]."\t"; + print $npoor[1]."\t"; + print $gc[1]."\t"; + print qcal($nreads[1],$qscore,\@qlines); + print "\n"; +} # Sum reads w/ Q scores > cutoff and divide by number of reads sub qcal { @@ -51,9 +60,9 @@ $sum = 0; foreach $item (@qarray) { - ($qval,$qscore)=split(/\t/,$item); + my($qval,$q)=split(/\t/,$item); if($qval>=$cutoff) { - $sum += $qscore; + $sum += $q; } } $qmean = sprintf("%.2f", 100 * $sum / $nreads);
