Mercurial > repos > erinija > dnp_correlation_between_profiles
comparison dnp-compute-composite.sh @ 0:b45de206654d draft default tip
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author | erinija |
---|---|
date | Fri, 01 May 2020 12:08:23 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b45de206654d |
---|---|
1 #!/bin/sh | |
2 | |
3 if test "$#" -ne 2; then | |
4 echo "" | |
5 echo " CALL " | |
6 echo " sh dnp-compute-composite.sh compute-composite-input.tabular compute-composite-output.tabular" | |
7 echo "" | |
8 echo " INPUT " | |
9 echo " compute-composite-input.tabular - dinucleotide frequency profiles containing all 16 dinucleotides" | |
10 echo "" | |
11 echo " OUTPUT " | |
12 echo " compute-composite-output.tabular - original input with WW SS RR YY columns added to the output" | |
13 echo "" | |
14 echo " DESCRIPTION" | |
15 echo " Composite dinucleotides weak/weak WW (A or T) , strong/strong SS (G or C), purine/purine RR (A or G), " | |
16 echo " and pyrimidine/pyrimidine YY (C or T) are generalized dinucleotide frequency patterns in nucleosome sequences." | |
17 echo " Given a tabular innput file with all 16 dinucleotides the composite patterns are computed as follows" | |
18 echo " WW=AA+AT+TA+TT, SS=CC+CG+GC+GG, RR=AG+GA+AA+GG, YY=CC+TT+CT+TC and their columns added to the original table." | |
19 echo "" | |
20 echo " Example columns of an input table" | |
21 echo " pos AA AC AG AT ..." | |
22 echo " -73 0.08616 0.08034 0.07146 0.05934 ..." | |
23 echo " -72 0.11976 0.04966 0.03412 0.07274 ..." | |
24 echo " -71 0.07202 0.08882 0.18912 0.0462 ..." | |
25 echo " ... " | |
26 echo " Example columns of computed composites " | |
27 echo " ... WW SS RR" | |
28 echo " ... 0.27644 0.1614 0.29494" | |
29 echo " ... 0.36788 0.1091 0.29428" | |
30 echo " ... 0.21406 0.12566 0.34432" | |
31 echo " ..." | |
32 | |
33 exit 1 | |
34 fi | |
35 | |
36 name=$1 | |
37 out=$2 | |
38 | |
39 ## TO DO test that all required nucleotide columns are in file | |
40 ## if not, stop the execution | |
41 | |
42 # add the column of composite profile at the end | |
43 ## WW = AA+TT+AT+TA | |
44 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="AA") print i; exit}' ${name}` | |
45 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="AT") print i; exit}' ${name}` | |
46 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="TA") print i; exit}' ${name}` | |
47 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="TT") print i; exit}' ${name}` | |
48 echo WW > ww | |
49 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> ww | |
50 | |
51 ## SS = CC+CG+GC+GG | |
52 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="CC") print i; exit}' ${name}` | |
53 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="CG") print i; exit}' ${name}` | |
54 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="GC") print i; exit}' ${name}` | |
55 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="GG") print i; exit}' ${name}` | |
56 echo SS > ss | |
57 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> ss | |
58 | |
59 ## RR = AA+AG+GA+GG | |
60 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="AA") print i; exit}' ${name}` | |
61 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="AG") print i; exit}' ${name}` | |
62 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="GA") print i; exit}' ${name}` | |
63 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="GG") print i; exit}' ${name}` | |
64 echo RR > rr | |
65 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> rr | |
66 | |
67 ## YY = CC+CT+TC+TT | |
68 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="CC") print i; exit}' ${name}` | |
69 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="CT") print i; exit}' ${name}` | |
70 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="TC") print i; exit}' ${name}` | |
71 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="TT") print i; exit}' ${name}` | |
72 echo YY > yy | |
73 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 |awk '{print $1+$2+$3+$4}' >> yy | |
74 | |
75 paste ${name} ww ss rr yy > ${out} | |
76 | |
77 rm ww ss rr yy |