Mercurial > repos > saskia-hiltemann > cgtag_circos
comparison scripts/variations2circos.sh @ 0:46f7f689b929 draft default tip
Uploaded
author | saskia-hiltemann |
---|---|
date | Tue, 17 Sep 2013 11:29:11 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:46f7f689b929 |
---|---|
1 #! /bin/bash | |
2 | |
3 #convert LV output to circos input | |
4 # LV out: variantId chromosome begin end varType reference alleleSeq xRef | |
5 | |
6 # bin results and get snp density (value 0-1, numvars/binsize) | |
7 # determine highest value seen and write "max = <maxval>" to max.txt | |
8 | |
9 infile=$1 | |
10 outfile_wg=$2 | |
11 outfile_chr=$3 | |
12 | |
13 | |
14 echo "converting variations file" | |
15 | |
16 #whole-chromosome file (bigger bins) | |
17 awk 'BEGIN{ | |
18 FS="\t"; | |
19 OFS=" "; | |
20 maxval=0; | |
21 binsize=5120000 | |
22 }{ | |
23 if (FNR>1 && index($1,"#")==0 && $0!="" && index($1,">")==0){ | |
24 density[$2,int($3/binsize)]++; | |
25 } | |
26 | |
27 | |
28 }END{ | |
29 for (i in density){ | |
30 numbins++; | |
31 avg+=(density[i]/binsize); | |
32 split(i, separate, "\034") # separate[1] contains chr, separate[2] contains bin | |
33 print separate[1], separate[2]*binsize, (separate[2]+1)*binsize, density[i]/binsize | |
34 if(density[i]/binsize > maxval) | |
35 maxval=density[i]/binsize | |
36 | |
37 } | |
38 avg/=numbins | |
39 print "max = " maxval > "maxval_orig.txt" | |
40 if(avg*5 < maxval) | |
41 maxval = avg*5 | |
42 print "avg = " avg > "avg.txt" | |
43 print "max = " maxval > "maxval.txt" | |
44 print maxval > "maxtmp" | |
45 | |
46 | |
47 }' $infile > $outfile_wg.tmp2 | |
48 | |
49 maxval=`cat maxtmp` | |
50 echo "maxval: $maxval" | |
51 | |
52 #second pass, anything greater than maxval is set to maxval | |
53 awk 'BEGIN{ | |
54 FS="\t"; | |
55 OFS=" "; | |
56 maxval="'"$maxval"'" | |
57 | |
58 }{ | |
59 if (FNR>1 && $4 > maxval){ | |
60 print $1,$2,$3,maxval | |
61 } | |
62 else print $0 | |
63 | |
64 | |
65 }END{ | |
66 }' $outfile_wg.tmp2 > $outfile_wg.tmp | |
67 | |
68 | |
69 sed -i 's/chr/hs/g' $outfile_wg.tmp | |
70 sed -i '/hsM/d' $outfile_wg.tmp | |
71 sort -d -k1,2 $outfile_wg.tmp > $outfile_wg | |
72 | |
73 | |
74 | |
75 | |
76 #per-chromosome file (smaller bins) | |
77 awk 'BEGIN{ | |
78 FS="\t"; | |
79 OFS=" "; | |
80 binsize=512000 | |
81 }{ | |
82 if (FNR>1){ | |
83 density[$2,int($3/binsize)]++; | |
84 } | |
85 | |
86 | |
87 }END{ | |
88 for (i in density){ | |
89 numbins["all"]++; | |
90 avg["all"]+=(density[i]/binsize); | |
91 | |
92 split(i, separate, "\034") # separate[1] contains chr, separate[2] contains bin | |
93 chr=separate[1] | |
94 avg[chr]+= (density[i]/binsize); | |
95 numbins[chr]++ | |
96 | |
97 print separate[1], separate[2]*binsize, (separate[2]+1)*binsize, density[i]/binsize | |
98 | |
99 if(density[i]/binsize > maxval["all"]) | |
100 maxval["all"]=density[i]/binsize | |
101 if(density[i]/binsize > maxval[chr]) | |
102 maxval[chr]=density[i]/binsize | |
103 | |
104 } | |
105 | |
106 | |
107 | |
108 for (i in avg){ | |
109 avg[i]/=numbins[i] | |
110 if(avg[i]*3 < maxval[i]) | |
111 maxval[i] = avg[i]*3 | |
112 print "avg = " avg[i] > "snp_avg_"i".txt" | |
113 print "max = " maxval[i] > "snp_maxval_"i".txt" | |
114 | |
115 } | |
116 | |
117 | |
118 | |
119 }' $infile > $outfile_chr.tmp | |
120 | |
121 | |
122 | |
123 sed -i 's/chr/hs/g' $outfile_chr.tmp | |
124 sed -i '/hsM/d' $outfile_chr.tmp | |
125 sort -d -k1,2 $outfile_chr.tmp > $outfile_chr | |
126 | |
127 | |
128 | |
129 |