Mercurial > repos > saskia-hiltemann > cgtag_circos
view scripts/variations2circos.sh @ 0:46f7f689b929 draft default tip
Uploaded
author | saskia-hiltemann |
---|---|
date | Tue, 17 Sep 2013 11:29:11 -0400 |
parents | |
children |
line wrap: on
line source
#! /bin/bash #convert LV output to circos input # LV out: variantId chromosome begin end varType reference alleleSeq xRef # bin results and get snp density (value 0-1, numvars/binsize) # determine highest value seen and write "max = <maxval>" to max.txt infile=$1 outfile_wg=$2 outfile_chr=$3 echo "converting variations file" #whole-chromosome file (bigger bins) awk 'BEGIN{ FS="\t"; OFS=" "; maxval=0; binsize=5120000 }{ if (FNR>1 && index($1,"#")==0 && $0!="" && index($1,">")==0){ density[$2,int($3/binsize)]++; } }END{ for (i in density){ numbins++; avg+=(density[i]/binsize); split(i, separate, "\034") # separate[1] contains chr, separate[2] contains bin print separate[1], separate[2]*binsize, (separate[2]+1)*binsize, density[i]/binsize if(density[i]/binsize > maxval) maxval=density[i]/binsize } avg/=numbins print "max = " maxval > "maxval_orig.txt" if(avg*5 < maxval) maxval = avg*5 print "avg = " avg > "avg.txt" print "max = " maxval > "maxval.txt" print maxval > "maxtmp" }' $infile > $outfile_wg.tmp2 maxval=`cat maxtmp` echo "maxval: $maxval" #second pass, anything greater than maxval is set to maxval awk 'BEGIN{ FS="\t"; OFS=" "; maxval="'"$maxval"'" }{ if (FNR>1 && $4 > maxval){ print $1,$2,$3,maxval } else print $0 }END{ }' $outfile_wg.tmp2 > $outfile_wg.tmp sed -i 's/chr/hs/g' $outfile_wg.tmp sed -i '/hsM/d' $outfile_wg.tmp sort -d -k1,2 $outfile_wg.tmp > $outfile_wg #per-chromosome file (smaller bins) awk 'BEGIN{ FS="\t"; OFS=" "; binsize=512000 }{ if (FNR>1){ density[$2,int($3/binsize)]++; } }END{ for (i in density){ numbins["all"]++; avg["all"]+=(density[i]/binsize); split(i, separate, "\034") # separate[1] contains chr, separate[2] contains bin chr=separate[1] avg[chr]+= (density[i]/binsize); numbins[chr]++ print separate[1], separate[2]*binsize, (separate[2]+1)*binsize, density[i]/binsize if(density[i]/binsize > maxval["all"]) maxval["all"]=density[i]/binsize if(density[i]/binsize > maxval[chr]) maxval[chr]=density[i]/binsize } for (i in avg){ avg[i]/=numbins[i] if(avg[i]*3 < maxval[i]) maxval[i] = avg[i]*3 print "avg = " avg[i] > "snp_avg_"i".txt" print "max = " maxval[i] > "snp_maxval_"i".txt" } }' $infile > $outfile_chr.tmp sed -i 's/chr/hs/g' $outfile_chr.tmp sed -i '/hsM/d' $outfile_chr.tmp sort -d -k1,2 $outfile_chr.tmp > $outfile_chr