annotate scripts/variations2circos.sh @ 0:46f7f689b929 draft default tip

Uploaded
author saskia-hiltemann
date Tue, 17 Sep 2013 11:29:11 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
1 #! /bin/bash
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
2
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
3 #convert LV output to circos input
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
4 # LV out: variantId chromosome begin end varType reference alleleSeq xRef
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
5
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
6 # bin results and get snp density (value 0-1, numvars/binsize)
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
7 # determine highest value seen and write "max = <maxval>" to max.txt
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
8
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
9 infile=$1
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
10 outfile_wg=$2
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
11 outfile_chr=$3
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
12
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
13
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
14 echo "converting variations file"
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
15
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
16 #whole-chromosome file (bigger bins)
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
17 awk 'BEGIN{
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
18 FS="\t";
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
19 OFS=" ";
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
20 maxval=0;
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
21 binsize=5120000
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
22 }{
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
23 if (FNR>1 && index($1,"#")==0 && $0!="" && index($1,">")==0){
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
24 density[$2,int($3/binsize)]++;
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
25 }
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
26
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
27
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
28 }END{
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
29 for (i in density){
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
30 numbins++;
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
31 avg+=(density[i]/binsize);
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
32 split(i, separate, "\034") # separate[1] contains chr, separate[2] contains bin
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
33 print separate[1], separate[2]*binsize, (separate[2]+1)*binsize, density[i]/binsize
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
34 if(density[i]/binsize > maxval)
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
35 maxval=density[i]/binsize
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
36
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
37 }
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
38 avg/=numbins
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
39 print "max = " maxval > "maxval_orig.txt"
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
40 if(avg*5 < maxval)
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
41 maxval = avg*5
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
42 print "avg = " avg > "avg.txt"
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
43 print "max = " maxval > "maxval.txt"
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
44 print maxval > "maxtmp"
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
45
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
46
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
47 }' $infile > $outfile_wg.tmp2
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
48
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
49 maxval=`cat maxtmp`
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
50 echo "maxval: $maxval"
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
51
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
52 #second pass, anything greater than maxval is set to maxval
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
53 awk 'BEGIN{
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
54 FS="\t";
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
55 OFS=" ";
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
56 maxval="'"$maxval"'"
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
57
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
58 }{
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
59 if (FNR>1 && $4 > maxval){
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
60 print $1,$2,$3,maxval
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
61 }
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
62 else print $0
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
63
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
64
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
65 }END{
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
66 }' $outfile_wg.tmp2 > $outfile_wg.tmp
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
67
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
68
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
69 sed -i 's/chr/hs/g' $outfile_wg.tmp
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
70 sed -i '/hsM/d' $outfile_wg.tmp
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
71 sort -d -k1,2 $outfile_wg.tmp > $outfile_wg
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
72
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
73
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
74
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
75
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
76 #per-chromosome file (smaller bins)
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
77 awk 'BEGIN{
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
78 FS="\t";
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
79 OFS=" ";
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
80 binsize=512000
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
81 }{
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
82 if (FNR>1){
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
83 density[$2,int($3/binsize)]++;
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
84 }
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
85
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
86
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
87 }END{
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
88 for (i in density){
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
89 numbins["all"]++;
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
90 avg["all"]+=(density[i]/binsize);
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
91
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
92 split(i, separate, "\034") # separate[1] contains chr, separate[2] contains bin
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
93 chr=separate[1]
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
94 avg[chr]+= (density[i]/binsize);
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
95 numbins[chr]++
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
96
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
97 print separate[1], separate[2]*binsize, (separate[2]+1)*binsize, density[i]/binsize
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
98
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
99 if(density[i]/binsize > maxval["all"])
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
100 maxval["all"]=density[i]/binsize
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
101 if(density[i]/binsize > maxval[chr])
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
102 maxval[chr]=density[i]/binsize
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
103
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
104 }
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
105
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
106
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
107
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
108 for (i in avg){
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
109 avg[i]/=numbins[i]
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
110 if(avg[i]*3 < maxval[i])
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
111 maxval[i] = avg[i]*3
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
112 print "avg = " avg[i] > "snp_avg_"i".txt"
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
113 print "max = " maxval[i] > "snp_maxval_"i".txt"
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
114
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
115 }
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
116
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
117
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
118
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
119 }' $infile > $outfile_chr.tmp
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
120
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
121
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
122
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
123 sed -i 's/chr/hs/g' $outfile_chr.tmp
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
124 sed -i '/hsM/d' $outfile_chr.tmp
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
125 sort -d -k1,2 $outfile_chr.tmp > $outfile_chr
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
126
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
127
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
128
46f7f689b929 Uploaded
saskia-hiltemann
parents:
diff changeset
129