Mercurial > repos > erinija > dnp_correlation_between_profiles
annotate dnp-correlation-between-profiles.sh @ 0:b45de206654d draft default tip
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author | erinija |
---|---|
date | Fri, 01 May 2020 12:08:23 +0000 |
parents | |
children |
rev | line source |
---|---|
0
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
1 #!/bin/sh |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
2 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
3 if test "$#" -ne 4; then |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
4 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
5 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
6 echo " CALL " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
7 echo " sh dnp-correlation-between-profiles.sh difreq.tabular winsize dinucleotides correlations.tabular" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
8 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
9 echo " INPUT" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
10 echo " difreq.tabular - tabular file of dinucleotide frequencies on forward and complementary fasta sequences " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
11 echo " winsize - size of sliding window within which correlation is computed (default 146)" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
12 echo " dinucleotides - 'AA AC AG AT ...'" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
13 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
14 echo " OUTPUT" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
15 echo " correlations.tabular - tabular file of Pearson correlations at each position for given dinucleotides" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
16 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
17 echo " DESCRIPTION" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
18 echo " To find a position of highest symmetry between the" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
19 echo " dinucleotides positional frequency profiles on" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
20 echo " forward and complementary sequences a Pearson" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
21 echo " correlation coefficient is computed at each position" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
22 echo " between forward and complementary dinucleotide profiles" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
23 echo " within a sliding window." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
24 echo " Input file contains dinucleotide frequencies arranged in columns" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
25 echo " for input dinucleotides named by *.f and *.r corresponding to" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
26 echo " forward and complementary profiles as:" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
27 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
28 echo " AA.f AA.r AC.f AC.r AG.f AG.r" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
29 echo " 0.076320 0.067920 0.057800 0.078120 0.081600 0.061960" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
30 echo " 0.072540 0.069520 0.041800 0.079820 0.076300 0.055190" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
31 echo " ... ... ... ... ... ..." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
32 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
33 echo " The input.tabular file is output of a dnp-subset-dinuc-profile.sh script." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
34 echo " A tabular output file contains columns of correlation coefficients " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
35 echo " along the positions of the frequency profile. A first column labelled 0 contains " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
36 echo " average of all correlations at each position:" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
37 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
38 echo " 0 AA AC AG" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
39 echo " 0.042205 0.0882716 0.030175 0.126967 " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
40 echo " ... ... ... ..." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
41 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
42 echo " Position and value of maximum correlations" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
43 echo " of each dinucleotide are printed to a standard output." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
44 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
45 echo " REQUIREMENT" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
46 echo " dnp-corrprofile installed" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
47 echo " conda install -c bioconda dnp-corrprofile" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
48 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
49 exit 1 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
50 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
51 fi |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
52 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
53 name=$1 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
54 window=$2 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
55 dinucleotides=$3 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
56 out=$4 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
57 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
58 call=dnp-corrprofile |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
59 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
60 # compute length of the file |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
61 len=`wc ${name} |awk '{print $1}'` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
62 len=$((len-1)) |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
63 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
64 for di in ${dinucleotides} |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
65 do |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
66 #extract column numbers of required dinucleotide |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
67 i=`awk -v name=$di'.f' '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' $name` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
68 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
69 # create a file of two columns in temp |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
70 awk -v k1=${i} -v k2=$((i+1)) '{print $k1 "\t" $k2}' $name | sed -n "2,${len}p" > temp.${di} |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
71 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
72 # put the header of the column |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
73 echo ${di} > cp${i} |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
74 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
75 # correlation dinucleotide profile along the sequence |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
76 # ./corrprofile aa -n 219 -w 146 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
77 # output in the temp column |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
78 ${call} temp.${di} -n $((len-1)) -w ${window} >> cp${i} |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
79 # print the maximum |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
80 posval=`cat cp${i} | grep -v ${di} | cat -n | sort -k2,2n | tail -n1` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
81 echo ${di} ${posval} >> maxpos |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
82 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
83 done |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
84 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
85 paste cp[1-9]* > temp |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
86 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
87 # compute average correlation profile |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
88 awk 'NR==2{next} |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
89 {T=0; |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
90 for(N=1; N<=NF; N++) T+=$N; |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
91 T/=NF |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
92 print T "\t" $0}' temp > ${out} |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
93 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
94 # find a position of the maximum and output it |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
95 posval=`cat ${out}| awk '{print $1} ' | cat -n | sort -k2,2n | tail -n1` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
96 echo "avg " ${posval} >> maxpos |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
97 cat maxpos | tr "\n" , |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
98 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
99 rm temp* cp[1-9]* maxpos |