comparison dnp-symmetrize.sh @ 0:b45de206654d draft default tip

"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author erinija
date Fri, 01 May 2020 12:08:23 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b45de206654d
1 #!/bin/sh
2
3 if test "$#" -ne 2; then
4
5 echo ""
6 echo " CALL "
7 echo " sh dnp-symmetrize.sh symmetrize-input.tabular symmetrize-output.tabular"
8 echo ""
9 echo " INPUT "
10 echo " symmetrize-input.tabular - selected length dinucleotide frequency profiles from forward and complementary sequences"
11 echo ""
12 echo " OUTPUT "
13 echo " symmetrize-output.tabular - symmetrized output"
14 echo ""
15 echo " DESCRIPTION"
16 echo " Symmetrization of dinucleotide profiles reveals patterns with respect to nucleosome's dyad position."
17 echo " Frequency profiles of each dinucleotide derived from forward and complementary sequences are superimposed "
18 echo " with respect to a center of the dyad by averaging forward and reverse complement profiles at an identified"
19 echo " nucleosome's location. A first column contains position number relative to the dyad position."
20 echo ""
21 echo " Example of an input table"
22 echo " pos AA.f AC.f AG.f AT.f CA.f CC.f CG.f CT.f ..."
23 echo " 20 0.100200 0.084720 0.077200 0.072480 0.066160 0.044160 0.004560 0.060720"
24 echo " 21 0.172440 0.024800 0.002080 0.101240 0.131840 0.007200 0.000320 0.095920"
25 echo " 22 0.077160 0.096240 0.314320 0.047360 0.012040 0.028560 0.011840 0.013680"
26 echo " ..."
27 echo ""
28 echo " Example of a few columns of an output table"
29 echo " pos AA AC AG AT ..."
30 echo " -73 0.08616 0.08034 0.07146 0.05934 ..."
31 echo " -72 0.11976 0.04966 0.03412 0.07274 ..."
32 echo " -71 0.07202 0.08882 0.18912 0.0462 ..."
33 echo " ... "
34
35 exit 1
36 fi
37
38 #
39 # input file name
40 name=$1
41 # symmetrize all dinucleotides that are in the table
42
43 # output file
44 out=$2
45
46 # compute centerd sequence position
47 posi=`awk -v name=pos '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}`
48 posnum=`awk -v k=${posi} '{print $k}' $name |head -n2| tail -n1`
49
50 echo "pos" > positions
51 awk -v k=${posi} '{print $k}' $name | tail -n +2 | awk -v offset=$((posnum-1)) '{print $1-offset-74}'>> positions
52
53
54 # get the dinucleotides from the first line
55 dinucleotides=`head -n1 ${name} | sed 's/pos//' | sed 's/.f//g' | sed 's/.r//g'`
56
57 #cnum=1
58 for di in ${dinucleotides}
59 do
60 # column number for forward and complementary profile
61 i1=`awk -v name=$di'.f' '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}`
62 i2=`awk -v name=$di'.r' '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}`
63 awk -v k=${i1} '{print $k}' $name | grep -v ".f" > temp.${i1}
64 awk -v k=${i2} '{print $k}' $name | grep -v ".r" | tac > temp.${i2}
65 echo ${di} > cp.${di}
66 paste temp.${i1} temp.${i2} | awk '{print ($1+$2)/2 }' >> cp.${di}
67
68 #cnum=$((cnum+1))
69
70 done
71
72 paste positions cp.* > ${out}
73 rm temp* cp* positions