annotate dnp-fourier-transform.sh @ 0:b45de206654d draft default tip

"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author erinija
date Fri, 01 May 2020 12:08:23 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
1 #!/bin/sh
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
2
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
3 if test "$#" -ne 5; then
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
4
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
5 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
6 echo " CALL "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
7 echo " sh dnp-fourier-transform.sh difreq.profiles.tabular difreq.periodogram.tabular normalization winsize trim"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
8 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
9 echo " INPUT "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
10 echo " difreq.profiles.tabular - dinucleotide frequency profiles-patterns"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
11 echo " normalization - type of normalization, suggested is quadratic =2"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
12 echo " winsize - size of averaging window, suggested optimal value =3"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
13 echo " trim - how many noisy points to remove from both ends of the profile suggested =4"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
14 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
15 echo " OUTPUT "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
16 echo " difreq.periodogram.tabular - periodogram "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
17 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
18 echo " DESCRIPRION"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
19 echo " This is a shell wrapper of the call to dnp-fourier which computes a periodogram "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
20 echo " of a series given as a numerical column and has parameters:"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
21 echo " dnp-fourier -f input -o output -n {normalization 0|1|2} -l length_of_smoothing_window -t {type_of_output 1|2|3}"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
22 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
23 echo " The parameters control a type of normalization and output:"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
24 echo " Normalization"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
25 echo " 0 base normalization subtracts mean"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
26 echo " 1 linear normalization removes linear trand "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
27 echo " 2 quadratic normalization removes quadratic trend"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
28 echo " Output type"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
29 echo " 1 normalization outputs normalized original series"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
30 echo " 2 smoothing outputs smoothed original series"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
31 echo " 3 Fourier transform outputs periofogram"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
32 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
33 echo " Fourier transform has to be applied on symmetrized, nonsmoothed data, since it performs"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
34 echo " smoothing internally. Dinucleotide frequency profiles usually have a quadratic gradient,"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
35 echo " therefore a quadratic normalization is used by default. Fourier transform is applied to every "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
36 echo " column of the input table. In the output the first column contains period as a number of base pairs."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
37 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
38 echo " Example of input table"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
39 echo " pos AA AC AG AT CA CC CG CT GA GC GG GT TA TC TG TT WW SS RR YY"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
40 echo " -73 0.05664 0.0657 0.06966 0.03644 0.08026 0.09484 0.0362 0.09086 0.07084 0.04032 0.07318 0.06466 0.03862 0.06838 0.05722 0.05602 0.18772 0.24454 0.27032 0.3101"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
41 echo " -72 0.0668 0.06476 0.0753 0.04282 0.07022 0.08034 0.03534 0.081 0.07222 0.03512 0.06774 0.0598 0.03934 0.07496 0.06628 0.06784 0.2168 0.21854 0.28206 0.30414"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
42 echo " -71 0.063 0.0621 0.07668 0.04316 0.06926 0.07264 0.03316 0.07992 0.07546 0.03498 0.07306 0.06406 0.04182 0.07374 0.06874 0.06812 0.2161 0.21384 0.2882 0.29442"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
43 echo " -70 0.0624 0.0643 0.07214 0.04424 0.0642 0.06998 0.03472 0.07718 0.0723 0.03982 0.07472 0.06818 0.04282 0.07674 0.06864 0.06754 0.217 0.21924 0.28156 0.29144"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
44 echo " -69 0.0622 0.06456 0.074 0.0426 0.0661 0.07114 0.03414 0.08016 0.0703 0.03786 0.07118 0.06754 0.0421 0.07712 0.06988 0.06904 0.21594 0.21432 0.27768 0.29746"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
45 echo " ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
46 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
47 echo " Example of output"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
48 echo " period AA AC AG ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
49 echo " 2.100000 0.055962 0.061351 0.059462 ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
50 echo " 2.200000 0.031410 0.027762 0.030298 ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
51 echo " ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
52 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
53 echo " REQUIREMENT"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
54 echo " dnp-fourier installed"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
55 echo " conda install -c bioconda dnp-fourier"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
56 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
57
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
58 exit 1
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
59 fi
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
60
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
61 # column comes with the header which we have to remove
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
62 input=$1
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
63 output=$2
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
64
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
65 #normalization=2
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
66 normalization=$3
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
67
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
68 #smoothingw=3
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
69 smoothingw=$4
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
70
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
71 #trim=2
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
72 trim=$5
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
73 trim1=$((trim+1))
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
74 outputtype=3
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
75
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
76 call=dnp-fourier
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
77
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
78 dinucleotides=`head -n1 ${input} | sed 's/pos//'`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
79
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
80 for di in ${dinucleotides}
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
81 do
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
82 # column number of dinucleotide
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
83 i1=`awk -v name=$di '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${input}`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
84 awk -v k=${i1} '{print $k}' $input | grep -v ${di} | tail -n +${trim1} | head -n -${trim}| cat -n > temp.${di}
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
85
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
86 # Submit the call and parameters
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
87 ${call} -f temp.${di} -o ft_output.${di} -n ${normalization} -l ${smoothingw} -t ${outputtype}
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
88
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
89 echo "period" > period
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
90 awk '{print $1}' ft_output.${di} >> period
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
91
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
92 echo ${di} > ft.${di}
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
93 awk '{print $2}' ft_output.${di} >> ft.${di}
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
94
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
95 done
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
96 paste period ft.* > ${output}
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
97
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
98 rm temp.* ft.* period ft_output.*