Mercurial > repos > erinija > plot_selected
annotate dnp-smooth.sh @ 0:448204d12325 draft default tip
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author | erinija |
---|---|
date | Fri, 01 May 2020 12:13:00 +0000 |
parents | |
children |
rev | line source |
---|---|
0
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
1 #!/bin/sh |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
2 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
3 if test "$#" -ne 4; then |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
4 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
5 echo "" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
6 echo " CALL " |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
7 echo " sh dnp-smooth.sh smoothing-input.tabular winsize trim smoothed-output.tabular" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
8 echo "" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
9 echo " INPUT " |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
10 echo " smoothing-input.tabular - dinucleotide frequency profiles-patterns to smooth" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
11 echo " winsize - size of averaging window, suggested optimal value =3" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
12 echo " trim - how many noisy points to remove from both ends of the profile suggested =4" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
13 echo "" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
14 echo " OUTPUT " |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
15 echo " smoothed-output.tabular - original series smoothed " |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
16 echo "" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
17 echo " DESCRIPRION" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
18 echo " Applies smoothing on dinucleotide profiles. Smoothing reduces noise and enhances" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
19 echo " a representation of the dinucleotide frequency profiles. Smoothing is performed by" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
20 echo " moving average with a chosen window size (optimal winsize=3). Smoothing script is based on" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
21 echo " a shell wrapper of the call to dnp-fourier tool which computes a periodogram" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
22 echo " of a series given as a numerical column and has parameters:" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
23 echo " dnp-fourier -f input -o output -n {normalization 0|1|2} -l length_of_smoothing_window -t {type_of_output 1|2|3}" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
24 echo " The parameters control a type of normalization and output as follows:" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
25 echo " Normalization" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
26 echo " 0 base normalization subtracts mean" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
27 echo " 1 linear normalization removes linear trand " |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
28 echo " 2 quadratic normalization removes quadratic trend" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
29 echo " Output type" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
30 echo " 1 normalization outputs normalized original series" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
31 echo " 2 smoothing outputs smoothed original series" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
32 echo " 3 Fourier transform outputs periofogram" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
33 echo "" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
34 echo " Example of input table" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
35 echo " pos AA AC AG AT CA CC CG CT GA GC GG GT TA TC TG TT WW SS RR YY" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
36 echo " -73 0.05664 0.0657 0.06966 0.03644 0.08026 0.09484 0.0362 0.09086 0.07084 0.04032 0.07318 0.06466 0.03862 0.06838 0.05722 0.05602 0.18772 0.24454 0.27032 0.3101" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
37 echo " -72 0.0668 0.06476 0.0753 0.04282 0.07022 0.08034 0.03534 0.081 0.07222 0.03512 0.06774 0.0598 0.03934 0.07496 0.06628 0.06784 0.2168 0.21854 0.28206 0.30414" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
38 echo " -71 0.063 0.0621 0.07668 0.04316 0.06926 0.07264 0.03316 0.07992 0.07546 0.03498 0.07306 0.06406 0.04182 0.07374 0.06874 0.06812 0.2161 0.21384 0.2882 0.29442" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
39 echo " -70 0.0624 0.0643 0.07214 0.04424 0.0642 0.06998 0.03472 0.07718 0.0723 0.03982 0.07472 0.06818 0.04282 0.07674 0.06864 0.06754 0.217 0.21924 0.28156 0.29144" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
40 echo " -69 0.0622 0.06456 0.074 0.0426 0.0661 0.07114 0.03414 0.08016 0.0703 0.03786 0.07118 0.06754 0.0421 0.07712 0.06988 0.06904 0.21594 0.21432 0.27768 0.29746" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
41 echo " ..." |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
42 echo "" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
43 echo " Output table is an original input table but smoothed by moving average with given window size." |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
44 echo "" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
45 echo " REQUIREMENT" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
46 echo " dnp-fourier installed" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
47 echo " conda install -c bioconda dnp-fourier" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
48 echo "" |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
49 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
50 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
51 exit 1 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
52 fi |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
53 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
54 # input file name |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
55 name=$1 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
56 swindow=$2 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
57 trim=$3 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
58 out=$4 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
59 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
60 call=dnp-fourier |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
61 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
62 ## get the nucleotides from the header |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
63 dinucleotides=`head -n1 ${name} | sed 's/pos//'` |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
64 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
65 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
66 ## save the position information |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
67 posi=`awk '{ for (i=1; i<=NF; i++) if($i=="pos") print i; exit}' ${name}` |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
68 awk -v k=${posi} '{print $k}' $name | grep -v pos > posfile |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
69 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
70 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
71 for di in ${dinucleotides} |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
72 do |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
73 # column number of dinucleotide |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
74 i1=`awk -v name=$di '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}` |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
75 awk -v k=${i1} '{print $k}' $name | grep -v ${di} > temp.${di} |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
76 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
77 # add centered sequence position |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
78 paste posfile temp.${di} | tr "\t" " " > smoothprep |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
79 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
80 # perform smoothing |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
81 ${call} -f smoothprep -t 2 -l ${swindow} -o cps.${di} |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
82 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
83 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
84 # echo ${di} > cp.${di} |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
85 cat cps.${di} | tail -n +$(($trim+1)) | head -n -$(($trim+1)) > temp.${di} |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
86 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
87 echo "pos" > positions |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
88 cat temp.${di} | awk '{print $1}' >> positions |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
89 echo ${di} > cp.${di} |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
90 cat temp.${di} | awk '{print $2}' >> cp.${di} |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
91 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
92 done |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
93 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
94 #echo "pos" ${dinucleotides} | tr " " "\t" > ${out} |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
95 paste positions cp.* > ${out} |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
96 |
448204d12325
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
97 rm temp.* cp.* cps.* posfile smoothprep positions |