annotate dnp-compute-composite.sh @ 0:b45de206654d draft default tip

"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author erinija
date Fri, 01 May 2020 12:08:23 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
1 #!/bin/sh
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
2
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
3 if test "$#" -ne 2; then
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
4 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
5 echo " CALL "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
6 echo " sh dnp-compute-composite.sh compute-composite-input.tabular compute-composite-output.tabular"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
7 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
8 echo " INPUT "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
9 echo " compute-composite-input.tabular - dinucleotide frequency profiles containing all 16 dinucleotides"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
10 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
11 echo " OUTPUT "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
12 echo " compute-composite-output.tabular - original input with WW SS RR YY columns added to the output"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
13 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
14 echo " DESCRIPTION"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
15 echo " Composite dinucleotides weak/weak WW (A or T) , strong/strong SS (G or C), purine/purine RR (A or G), "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
16 echo " and pyrimidine/pyrimidine YY (C or T) are generalized dinucleotide frequency patterns in nucleosome sequences."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
17 echo " Given a tabular innput file with all 16 dinucleotides the composite patterns are computed as follows"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
18 echo " WW=AA+AT+TA+TT, SS=CC+CG+GC+GG, RR=AG+GA+AA+GG, YY=CC+TT+CT+TC and their columns added to the original table."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
19 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
20 echo " Example columns of an input table"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
21 echo " pos AA AC AG AT ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
22 echo " -73 0.08616 0.08034 0.07146 0.05934 ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
23 echo " -72 0.11976 0.04966 0.03412 0.07274 ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
24 echo " -71 0.07202 0.08882 0.18912 0.0462 ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
25 echo " ... "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
26 echo " Example columns of computed composites "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
27 echo " ... WW SS RR"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
28 echo " ... 0.27644 0.1614 0.29494"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
29 echo " ... 0.36788 0.1091 0.29428"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
30 echo " ... 0.21406 0.12566 0.34432"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
31 echo " ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
32
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
33 exit 1
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
34 fi
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
35
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
36 name=$1
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
37 out=$2
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
38
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
39 ## TO DO test that all required nucleotide columns are in file
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
40 ## if not, stop the execution
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
41
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
42 # add the column of composite profile at the end
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
43 ## WW = AA+TT+AT+TA
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
44 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="AA") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
45 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="AT") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
46 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="TA") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
47 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="TT") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
48 echo WW > ww
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
49 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> ww
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
50
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
51 ## SS = CC+CG+GC+GG
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
52 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="CC") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
53 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="CG") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
54 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="GC") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
55 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="GG") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
56 echo SS > ss
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
57 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> ss
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
58
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
59 ## RR = AA+AG+GA+GG
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
60 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="AA") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
61 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="AG") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
62 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="GA") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
63 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="GG") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
64 echo RR > rr
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
65 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> rr
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
66
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
67 ## YY = CC+CT+TC+TT
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
68 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="CC") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
69 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="CT") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
70 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="TC") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
71 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="TT") print i; exit}' ${name}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
72 echo YY > yy
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
73 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 |awk '{print $1+$2+$3+$4}' >> yy
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
74
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
75 paste ${name} ww ss rr yy > ${out}
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
76
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
77 rm ww ss rr yy