Mercurial > repos > erinija > dnp_correlation_between_profiles
comparison dnp-symmetrize.sh @ 0:b45de206654d draft default tip
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author | erinija |
---|---|
date | Fri, 01 May 2020 12:08:23 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b45de206654d |
---|---|
1 #!/bin/sh | |
2 | |
3 if test "$#" -ne 2; then | |
4 | |
5 echo "" | |
6 echo " CALL " | |
7 echo " sh dnp-symmetrize.sh symmetrize-input.tabular symmetrize-output.tabular" | |
8 echo "" | |
9 echo " INPUT " | |
10 echo " symmetrize-input.tabular - selected length dinucleotide frequency profiles from forward and complementary sequences" | |
11 echo "" | |
12 echo " OUTPUT " | |
13 echo " symmetrize-output.tabular - symmetrized output" | |
14 echo "" | |
15 echo " DESCRIPTION" | |
16 echo " Symmetrization of dinucleotide profiles reveals patterns with respect to nucleosome's dyad position." | |
17 echo " Frequency profiles of each dinucleotide derived from forward and complementary sequences are superimposed " | |
18 echo " with respect to a center of the dyad by averaging forward and reverse complement profiles at an identified" | |
19 echo " nucleosome's location. A first column contains position number relative to the dyad position." | |
20 echo "" | |
21 echo " Example of an input table" | |
22 echo " pos AA.f AC.f AG.f AT.f CA.f CC.f CG.f CT.f ..." | |
23 echo " 20 0.100200 0.084720 0.077200 0.072480 0.066160 0.044160 0.004560 0.060720" | |
24 echo " 21 0.172440 0.024800 0.002080 0.101240 0.131840 0.007200 0.000320 0.095920" | |
25 echo " 22 0.077160 0.096240 0.314320 0.047360 0.012040 0.028560 0.011840 0.013680" | |
26 echo " ..." | |
27 echo "" | |
28 echo " Example of a few columns of an output table" | |
29 echo " pos AA AC AG AT ..." | |
30 echo " -73 0.08616 0.08034 0.07146 0.05934 ..." | |
31 echo " -72 0.11976 0.04966 0.03412 0.07274 ..." | |
32 echo " -71 0.07202 0.08882 0.18912 0.0462 ..." | |
33 echo " ... " | |
34 | |
35 exit 1 | |
36 fi | |
37 | |
38 # | |
39 # input file name | |
40 name=$1 | |
41 # symmetrize all dinucleotides that are in the table | |
42 | |
43 # output file | |
44 out=$2 | |
45 | |
46 # compute centerd sequence position | |
47 posi=`awk -v name=pos '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}` | |
48 posnum=`awk -v k=${posi} '{print $k}' $name |head -n2| tail -n1` | |
49 | |
50 echo "pos" > positions | |
51 awk -v k=${posi} '{print $k}' $name | tail -n +2 | awk -v offset=$((posnum-1)) '{print $1-offset-74}'>> positions | |
52 | |
53 | |
54 # get the dinucleotides from the first line | |
55 dinucleotides=`head -n1 ${name} | sed 's/pos//' | sed 's/.f//g' | sed 's/.r//g'` | |
56 | |
57 #cnum=1 | |
58 for di in ${dinucleotides} | |
59 do | |
60 # column number for forward and complementary profile | |
61 i1=`awk -v name=$di'.f' '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}` | |
62 i2=`awk -v name=$di'.r' '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}` | |
63 awk -v k=${i1} '{print $k}' $name | grep -v ".f" > temp.${i1} | |
64 awk -v k=${i2} '{print $k}' $name | grep -v ".r" | tac > temp.${i2} | |
65 echo ${di} > cp.${di} | |
66 paste temp.${i1} temp.${i2} | awk '{print ($1+$2)/2 }' >> cp.${di} | |
67 | |
68 #cnum=$((cnum+1)) | |
69 | |
70 done | |
71 | |
72 paste positions cp.* > ${out} | |
73 rm temp* cp* positions |