annotate pileup.pl @ 2:9f09ba6a5f2c draft

Uploaded
author modencode-dcc
date Mon, 21 Jan 2013 13:24:18 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
1 #!/usr/bin/perl
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
2
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
3 #purpose: script which calculates the percent of genome coverage and average coverage of bases
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
4 #author: Ziru Zhou
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
5 #date: October, 2012
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
6
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
7
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
8 use strict;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
9 use warnings;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
10 use File::Basename;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
11
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
12 #globals
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
13 #======================================================================
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
14 my $bamfile = $ARGV[0];
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
15 my $reffile = $ARGV[1];
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
16 my $outputfile = $ARGV[2];
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
17 my $bamname = $ARGV[3];
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
18 my $refname = $ARGV[4];
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
19
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
20 my $genomesize = 0;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
21 my $pileupwc = 0;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
22 my $pileupc4 = 0;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
23
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
24 my $percentofcover = 0;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
25 my $averagebasecoverage = 0;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
26
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
27
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
28 #function declarations
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
29 #======================================================================
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
30 #get the pileupwc value
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
31 sub Getpileupwc()
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
32 {
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
33 #system ("samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 > tmp");
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
34 #$pileupwc = `cat tmp | wc -l`;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
35 $pileupwc = `samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 | wc -l`;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
36 }
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
37
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
38 #get the pileupc4 value
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
39 sub Getpileupc4()
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
40 {
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
41 $pileupc4 = `samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 | awk '{ total += \$1 } END { print total }'`;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
42 }
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
43
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
44 #get the genomesize value
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
45 sub Getgenomesize()
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
46 {
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
47 $genomesize = `tail -n 1 "$reffile.fai"| cut --fields=3`;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
48 }
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
49
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
50 #function to calculate the final 2 values for output
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
51 sub Calculate()
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
52 {
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
53 $percentofcover = ( int($pileupwc) / int($genomesize) ) * 100;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
54 $averagebasecoverage = int($pileupc4) / int($pileupwc);
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
55 }
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
56
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
57 #function to write to output file
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
58 sub Output()
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
59 {
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
60 open FILE, '>'.$outputfile or die "unable to create $outputfile\n";
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
61
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
62 print FILE "\n#";
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
63 print FILE "\n# Generated by BAMEdit. Please send your questions/comments to modENCODE DCC at help\@modencode.org.";
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
64 print FILE "\n#";
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
65 print FILE "\n# coverage: % of bases in genome covered";
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
66 print FILE "\n# avg coverage: average coverage of bases covered in genome ";
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
67 print FILE "\n#\n";
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
68 print FILE "input file\t$bamname\n";
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
69 print FILE "genome file\t$refname\n";
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
70 print FILE "genome length\t$genomesize";
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
71 printf FILE "coverage\t%.2f\n", $percentofcover;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
72 printf FILE "avg coverage\t%.2f\n", $averagebasecoverage;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
73
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
74 close FILE;
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
75 }
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
76
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
77 #function to clean up, the tmp file and the .fai file
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
78 sub Cleanup()
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
79 {
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
80 system ("sudo rm ${reffile}.dat.fai");
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
81 }
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
82
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
83 #function calls
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
84 #======================================================================
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
85 Getpileupwc();
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
86 Getpileupc4();
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
87 Getgenomesize();
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
88 Calculate();
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
89 Output();
9f09ba6a5f2c Uploaded
modencode-dcc
parents:
diff changeset
90 Cleanup();