annotate snpFreq2.pl @ 1:1e488c26e9d8 draft default tip

planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author devteam
date Tue, 13 Oct 2015 12:52:33 -0400
parents afdfa7335319
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env perl
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
2
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
3 use strict;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
4 use warnings;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
5
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
6 #using large SNP tables (~1G) may require large memory ~15G in R
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
7
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
8 #expected input: path to file, cols of counts (2 sets of 3), threshold
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
9 if (!@ARGV or scalar @ARGV != 11) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
10 if (!@ARGV or scalar @ARGV != 6) { #snpTable usage
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
11 print "usage for tab separated allele counts\n",
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
12 "snpFreq.pl inputType #threshold /path/to/snps.txt outfile <6 column numbers(1 based) with counts for alleles, first one group then another>\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
13 print "OR for SNP tables\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
14 print "usage snpFreq.pl inputType #threshold /path/to/snpTable.txt outfile group1File group2File\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
15 exit 1;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
16 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
17 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
18
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
19 #get and verify inputs
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
20 my ($file, $a1, $a2, $a3, $b1, $b2, $b3, $thresh, $outfile);
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
21 if ($ARGV[0] eq 'tab') {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
22 shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
23 $thresh = shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
24 if ($thresh !~ /^\d*\.?\d+$/) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
25 print "Error the threshold must be a number. Got $thresh\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
26 exit 1;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
27 }elsif ($thresh > .3) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
28 print "Error the threshold can not be greater than 0.3 got $thresh\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
29 exit 1;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
30 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
31 $file = shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
32 $outfile = shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
33 $a1 = shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
34 if ($a1 =~ /\D/ or $a1 < 1) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
35 print "Error the column number, must be an integer greater than or equal to 1. Got $a1\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
36 exit 1;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
37 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
38 $a2 = shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
39 if ($a2 =~ /\D/ or $a2 < 1) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
40 print "Error the column number, must be an integer greater than or equal to 1. Got $a2\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
41 exit 1;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
42 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
43 $a3 = shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
44 if ($a3 =~ /\D/ or $a3 < 1) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
45 print "Error the column number, must be an integer greater than or equal to 1. Got $a3\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
46 exit 1;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
47 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
48 $b1 = shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
49 if ($b1 =~ /\D/ or $b1 < 1) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
50 print "Error the column number, must be an integer greater than or equal to 1. Got $b1\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
51 exit 1;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
52 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
53 $b2 = shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
54 if ($b2 =~ /\D/ or $b2 < 1) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
55 print "Error the column number, must be an integer greater than or equal to 1. Got $b2\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
56 exit 1;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
57 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
58 $b3 = shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
59 if ($b3 =~ /\D/ or $b3 < 1) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
60 print "Error the column number, must be an integer greater than or equal to 1. Got $b3\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
61 exit 1;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
62 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
63 }else { #snp table convert and assign variables
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
64 #snpTable.txt #threshold outfile workingdir
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
65 shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
66 $thresh = shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
67 if ($thresh !~ /^\d*\.?\d+$/) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
68 print "Error the threshold must be a number. Got $thresh\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
69 exit 1;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
70 }elsif ($thresh > .3) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
71 print "Error the threshold can not be greater than 0.3 got $thresh\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
72 exit 1;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
73 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
74 $file = shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
75 $outfile = shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
76 my $grpFile = shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
77 my @g1;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
78 open(FH, $grpFile) or die "Couldn't open $grpFile, $!\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
79 while (<FH>) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
80 chomp;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
81 if (/^(\d+)\s/) { push(@g1, $1); }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
82 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
83 close FH or die "Couldn't close $grpFile, $!\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
84 $grpFile = shift @ARGV;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
85 my @g2;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
86 open(FH, $grpFile) or die "Couldn't open $grpFile, $!\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
87 while (<FH>) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
88 chomp;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
89 if (/^(\d+)\s/) { push(@g2, $1); }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
90 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
91 close FH or die "Couldn't close $grpFile, $!\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
92 if ($file =~ /.gz$/) {
1
1e488c26e9d8 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
93 open(FH, "zcat < $file |") or die "Couldn't read $file, $!\n";
0
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
94 }else {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
95 open(FH, $file) or die "Couldn't read $file, $!\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
96 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
97 open(OUT, ">", "snpTable.txt") or die "Couldn't open snpTable.txt, $!\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
98 my $size;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
99 while (<FH>) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
100 chomp;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
101 if (/^#/) { next; } #header
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
102 my @f = split(/\t/);
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
103 $size = scalar @f;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
104 my @gc1 = (0, 0, 0);
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
105 my @gc2 = (0, 0, 0);
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
106 foreach my $g (@g1) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
107 my $i = $g+1; #g is 1 based first col want 0 based snp call column
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
108 if ($i > $#f) { die "ERROR looking for index $i which is greater than the list $#f\n"; }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
109 if ($f[$i] == -1 or $f[$i] == 2) { #treat unknown as ref
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
110 $gc1[0]++;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
111 }elsif ($f[$i] == 1) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
112 $gc1[2]++;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
113 }elsif ($f[$i] == 0) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
114 $gc1[1]++;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
115 }else { die "Unexpected value for genotype $f[$i] in ", join(" ", @f), "\n"; }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
116 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
117 foreach my $g (@g2) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
118 my $i = $g+1; #g is 1 based first col want 0 based snp call column
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
119 if ($f[$i] == -1 or $f[$i] == 2) { #treat unknown as ref
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
120 $gc2[0]++;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
121 }elsif ($f[$i] == 1) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
122 $gc2[2]++;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
123 }elsif ($f[$i] == 0) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
124 $gc2[1]++;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
125 }else { die "Unexpected value for genotype $f[$i] in ", join(" ", @f), "\n"; }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
126 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
127 print OUT join("\t", @f), "\t", join("\t", @gc1), "\t", join("\t", @gc2),
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
128 "\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
129 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
130 close FH or die "Couldn't close $file, $!\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
131 close OUT or die "Couldn't close snpTable.txt, $!\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
132 my $i = $size + 1; #next 1 based column after input data
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
133 $a1 = $i++;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
134 $a2 = $i++;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
135 $a3 = $i++;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
136 $b1 = $i++;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
137 $b2 = $i++;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
138 $b3 = $i++;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
139 $file = "snpTable.txt";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
140 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
141
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
142 #run a fishers exact test (using R) on whole table
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
143 my $cmd = qq|options(warn=-1)
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
144 tab <- read.table('$file', sep="\t")
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
145 size <- length(tab[,1])
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
146 width <- length(tab[1,])
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
147 x <- 1:size
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
148 y <- matrix(data=0, nr=size, nc=6)
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
149 for(i in 1:size) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
150 m <- matrix(c(tab[i,$a1], tab[i,$b1], tab[i,$a2], tab[i,$b2], tab[i,$a3], tab[i,$b3]), nrow=2)
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
151 t <- fisher.test(m)
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
152 x[i] <- t\$p.value
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
153 if (x[i] >= 1) {
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
154 x[i] <- .999
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
155 }
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
156 n <- (tab[i,$a1] + tab[i,$a2] + tab[i,$a3] + tab[i,$b1] + tab[i,$b2] + tab[i,$b3])
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
157 n_a <- (tab[i,$a1] + tab[i,$a2] + tab[i,$a3])
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
158 y[i,1] <- ((tab[i,$a1] + tab[i,$b1])*(n_a))/n
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
159 y[i,1] <- round(y[i,1],3)
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
160 y[i,2] <- ((tab[i,$a2] + tab[i,$b2])*(n_a))/n
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
161 y[i,2] <- round(y[i,2],3)
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
162 y[i,3] <- ((tab[i,$a3] + tab[i,$b3])*(n_a))/n
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
163 y[i,3] <- round(y[i,3],3)
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
164 n_b <- (tab[i,$b1] + tab[i,$b2] + tab[i,$b3])
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
165 y[i,4] <- ((tab[i,$a1] + tab[i,$b1])*(n_b))/n
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
166 y[i,4] <- round(y[i,4],3)
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
167 y[i,5] <- ((tab[i,$a2] + tab[i,$b2])*(n_b))/n
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
168 y[i,5] <- round(y[i,5],3)
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
169 y[i,6] <- ((tab[i,$a3] + tab[i,$b3])*(n_b))/n
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
170 y[i,6] <- round(y[i,6],3)
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
171 }|;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
172 #results <- data.frame(tab[1:size,1:width], x[1:size])
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
173 #write.table(results, file="$outfile", row.names = FALSE ,col.names = FALSE,quote = FALSE, sep="\t")
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
174 #q()|;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
175
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
176 #my $cmd2 = qq|suppressPackageStartupMessages(library(lib.loc='/afs/bx.psu.edu/home/giardine/lib/R', qvalue))
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
177 my $cmd2 = qq|suppressPackageStartupMessages(library(qvalue))
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
178 qobj <- qvalue(x[1:size], lambda=seq(0,0.90,$thresh), pi0.method="bootstrap", fdr.level=0.1, robust=FALSE, smooth.log.pi0 = FALSE)
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
179 q <- qobj\$qvalues
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
180 results <- data.frame(tab[1:size,1:width], y[1:size,1:6], x[1:size], q[1:size])
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
181 write.table(results, file="$outfile", row.names = FALSE ,col.names = FALSE,quote = FALSE, sep="\t")
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
182 q()|;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
183
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
184 #for TESTING
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
185 my $pr = qq|results <- data.frame(tab[1:size,1:width], y[1:size,1:6], x[1:size])
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
186 write.table(results, file="$outfile", row.names = FALSE ,col.names = FALSE,quote = FALSE, sep="\t")
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
187 q()|;
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
188
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
189 open(FT, "| R --slave --vanilla")
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
190 or die "Couldn't call fisher.text, $!\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
191 print FT $cmd, "\n"; #fisher test
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
192 print FT $cmd2, "\n"; #qvalues and results
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
193 #print FT $pr, "\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
194 close FT or die "Couldn't finish fisher.test, $!\n";
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
195
afdfa7335319 Imported from capsule None
devteam
parents:
diff changeset
196 exit;