comparison dnp-compute-composite.sh @ 0:b45de206654d draft default tip

"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author erinija
date Fri, 01 May 2020 12:08:23 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b45de206654d
1 #!/bin/sh
2
3 if test "$#" -ne 2; then
4 echo ""
5 echo " CALL "
6 echo " sh dnp-compute-composite.sh compute-composite-input.tabular compute-composite-output.tabular"
7 echo ""
8 echo " INPUT "
9 echo " compute-composite-input.tabular - dinucleotide frequency profiles containing all 16 dinucleotides"
10 echo ""
11 echo " OUTPUT "
12 echo " compute-composite-output.tabular - original input with WW SS RR YY columns added to the output"
13 echo ""
14 echo " DESCRIPTION"
15 echo " Composite dinucleotides weak/weak WW (A or T) , strong/strong SS (G or C), purine/purine RR (A or G), "
16 echo " and pyrimidine/pyrimidine YY (C or T) are generalized dinucleotide frequency patterns in nucleosome sequences."
17 echo " Given a tabular innput file with all 16 dinucleotides the composite patterns are computed as follows"
18 echo " WW=AA+AT+TA+TT, SS=CC+CG+GC+GG, RR=AG+GA+AA+GG, YY=CC+TT+CT+TC and their columns added to the original table."
19 echo ""
20 echo " Example columns of an input table"
21 echo " pos AA AC AG AT ..."
22 echo " -73 0.08616 0.08034 0.07146 0.05934 ..."
23 echo " -72 0.11976 0.04966 0.03412 0.07274 ..."
24 echo " -71 0.07202 0.08882 0.18912 0.0462 ..."
25 echo " ... "
26 echo " Example columns of computed composites "
27 echo " ... WW SS RR"
28 echo " ... 0.27644 0.1614 0.29494"
29 echo " ... 0.36788 0.1091 0.29428"
30 echo " ... 0.21406 0.12566 0.34432"
31 echo " ..."
32
33 exit 1
34 fi
35
36 name=$1
37 out=$2
38
39 ## TO DO test that all required nucleotide columns are in file
40 ## if not, stop the execution
41
42 # add the column of composite profile at the end
43 ## WW = AA+TT+AT+TA
44 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="AA") print i; exit}' ${name}`
45 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="AT") print i; exit}' ${name}`
46 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="TA") print i; exit}' ${name}`
47 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="TT") print i; exit}' ${name}`
48 echo WW > ww
49 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> ww
50
51 ## SS = CC+CG+GC+GG
52 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="CC") print i; exit}' ${name}`
53 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="CG") print i; exit}' ${name}`
54 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="GC") print i; exit}' ${name}`
55 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="GG") print i; exit}' ${name}`
56 echo SS > ss
57 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> ss
58
59 ## RR = AA+AG+GA+GG
60 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="AA") print i; exit}' ${name}`
61 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="AG") print i; exit}' ${name}`
62 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="GA") print i; exit}' ${name}`
63 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="GG") print i; exit}' ${name}`
64 echo RR > rr
65 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> rr
66
67 ## YY = CC+CT+TC+TT
68 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="CC") print i; exit}' ${name}`
69 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="CT") print i; exit}' ${name}`
70 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="TC") print i; exit}' ${name}`
71 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="TT") print i; exit}' ${name}`
72 echo YY > yy
73 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 |awk '{print $1+$2+$3+$4}' >> yy
74
75 paste ${name} ww ss rr yy > ${out}
76
77 rm ww ss rr yy