Mercurial > repos > erinija > dnp_correlation_between_profiles
annotate dnp-compute-composite.sh @ 0:b45de206654d draft default tip
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author | erinija |
---|---|
date | Fri, 01 May 2020 12:08:23 +0000 |
parents | |
children |
rev | line source |
---|---|
0
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
1 #!/bin/sh |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
2 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
3 if test "$#" -ne 2; then |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
4 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
5 echo " CALL " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
6 echo " sh dnp-compute-composite.sh compute-composite-input.tabular compute-composite-output.tabular" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
7 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
8 echo " INPUT " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
9 echo " compute-composite-input.tabular - dinucleotide frequency profiles containing all 16 dinucleotides" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
10 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
11 echo " OUTPUT " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
12 echo " compute-composite-output.tabular - original input with WW SS RR YY columns added to the output" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
13 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
14 echo " DESCRIPTION" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
15 echo " Composite dinucleotides weak/weak WW (A or T) , strong/strong SS (G or C), purine/purine RR (A or G), " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
16 echo " and pyrimidine/pyrimidine YY (C or T) are generalized dinucleotide frequency patterns in nucleosome sequences." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
17 echo " Given a tabular innput file with all 16 dinucleotides the composite patterns are computed as follows" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
18 echo " WW=AA+AT+TA+TT, SS=CC+CG+GC+GG, RR=AG+GA+AA+GG, YY=CC+TT+CT+TC and their columns added to the original table." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
19 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
20 echo " Example columns of an input table" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
21 echo " pos AA AC AG AT ..." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
22 echo " -73 0.08616 0.08034 0.07146 0.05934 ..." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
23 echo " -72 0.11976 0.04966 0.03412 0.07274 ..." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
24 echo " -71 0.07202 0.08882 0.18912 0.0462 ..." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
25 echo " ... " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
26 echo " Example columns of computed composites " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
27 echo " ... WW SS RR" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
28 echo " ... 0.27644 0.1614 0.29494" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
29 echo " ... 0.36788 0.1091 0.29428" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
30 echo " ... 0.21406 0.12566 0.34432" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
31 echo " ..." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
32 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
33 exit 1 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
34 fi |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
35 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
36 name=$1 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
37 out=$2 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
38 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
39 ## TO DO test that all required nucleotide columns are in file |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
40 ## if not, stop the execution |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
41 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
42 # add the column of composite profile at the end |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
43 ## WW = AA+TT+AT+TA |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
44 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="AA") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
45 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="AT") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
46 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="TA") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
47 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="TT") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
48 echo WW > ww |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
49 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> ww |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
50 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
51 ## SS = CC+CG+GC+GG |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
52 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="CC") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
53 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="CG") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
54 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="GC") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
55 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="GG") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
56 echo SS > ss |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
57 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> ss |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
58 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
59 ## RR = AA+AG+GA+GG |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
60 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="AA") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
61 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="AG") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
62 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="GA") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
63 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="GG") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
64 echo RR > rr |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
65 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> rr |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
66 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
67 ## YY = CC+CT+TC+TT |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
68 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="CC") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
69 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="CT") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
70 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="TC") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
71 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="TT") print i; exit}' ${name}` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
72 echo YY > yy |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
73 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 |awk '{print $1+$2+$3+$4}' >> yy |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
74 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
75 paste ${name} ww ss rr yy > ${out} |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
76 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
77 rm ww ss rr yy |