0
|
1 #!usr/bin/bash
|
|
2
|
|
3
|
|
4 :<<'hey'
|
|
5 Command line looks like this :
|
|
6 ahopro_wrapper.sh ${ahopro_config} ${motif_file} ${function.function_selector} $outfile $function['seq_name'] $function['nbr_motif'] $actualN
|
|
7 # if function = p-value :, commad line will looks like this :
|
|
8 ahopro_wrapper.sh ${ahopro_config} ${motif_file} ${function.function_selector} $outfile $function['seq_name'] $function['nbr_motif'] $actualN ${letter_freq_file}
|
|
9
|
|
10
|
|
11 hey
|
|
12
|
|
13 #sort out arguments
|
|
14
|
|
15 configfile=$1
|
|
16 motif_file=$2
|
|
17 function=$3
|
|
18 #if function is "p-value", get the freqFile pramater
|
|
19 if [ "$function" == "p-value" ] ; then
|
|
20 letterFreqFile=$8
|
|
21 fi
|
|
22 output=$4
|
|
23 SEQ_NAME=$5
|
|
24 NBR_MOTIFS=$6
|
|
25 actual_NBR=$7
|
|
26
|
|
27
|
|
28
|
|
29 PATH_AHOPRO="/usr/bin/ahopro/AhoPro.1.3/"
|
|
30
|
|
31
|
|
32 # create tmp workindg dir and cd
|
|
33
|
|
34 OUTDIR=`mktemp -d`
|
|
35
|
|
36 #hard code the creation of OUTDIR
|
|
37 if [ -d $OUTDIR ]; then
|
|
38 chmod -R 777 $OUTDIR
|
|
39 else
|
|
40 mkdir $OUTDIR
|
|
41 fi
|
|
42
|
|
43 chmod -R 777 $OUTDIR
|
|
44
|
|
45 cd $OUTDIR
|
|
46
|
|
47
|
|
48
|
|
49 #echo "######configfile out of XML form :" > $output
|
|
50 #cat $configfile >> $output
|
|
51 #echo "motifFile :" >> $output
|
|
52 #cat $motif_file > $output
|
|
53
|
|
54
|
|
55
|
|
56
|
|
57 ## ************ First, check if the number of actual motif is met in the entries. If not, warn, and quit ******************************************************
|
|
58
|
|
59 if [[ $NBR_MOTIFS != $actual_NBR ]]; then
|
|
60
|
|
61 cat <<doc >$output
|
|
62 AhoPro error :
|
|
63
|
|
64 The number of motifs set in "Number of motifs" does not match the the actual number of motif entry in "Motifs" section. They should be the same.
|
|
65
|
|
66 Number of motifs : $NBR_MOTIFS
|
|
67 Number of actual motif entered : $actual_NBR
|
|
68
|
|
69 Please recheck these parameters.
|
|
70
|
|
71 doc
|
|
72
|
|
73 exit 0
|
|
74
|
|
75 fi
|
|
76
|
|
77 ## ************ END check ******************************************************
|
|
78
|
|
79
|
|
80
|
|
81 ### **************** Deal with motif entery ****************************************************************************************************
|
|
82
|
|
83
|
|
84 if [[ $NBR_MOTIFS = 1 ]]; then
|
|
85
|
|
86 #NO PARSING REQUIRED
|
|
87
|
|
88 #remove blank lines, tabs, spaces..
|
|
89 cat $motif_file | awk '/^[0-9\-ACTGatgc]/ {print}' > motif_tmp_0.txt
|
|
90 # reverse if the motif is a pmw, reverse it if necessary
|
|
91 type=`cat motif_tmp_0.txt | awk ' END { if ($1 ~ /[acATGC]/) {print "words"} else {print "pmw"} }'`
|
|
92
|
|
93 if [[ "$type" == "pmw" ]]; then
|
|
94
|
|
95 NF=`cat motif_tmp_0.txt | awk ' END {print NF}'`
|
|
96
|
|
97 if [[ $NF != 4 ]]; then #if it's not vertical, reverse it
|
|
98
|
|
99 for ((c=1 ; c <= $NF; c++))
|
|
100 do
|
|
101 cat motif_tmp_0.txt | awk -v c=$c '{ printf $c" " } END {printf"\n"}' >> motif_0.txt
|
|
102 done
|
|
103
|
|
104 rm motif_tmp_0.txt
|
|
105
|
|
106
|
|
107 elif [[ $NF == 0 ]]; then
|
|
108 echo "Please check the motif entries.Try removing tabulations, spaces." > $output
|
|
109 exit 0
|
|
110
|
|
111 else #if vertical
|
|
112 mv motif_tmp_0.txt motif_0.txt
|
|
113 fi
|
|
114
|
|
115 else #if words
|
|
116 mv motif_tmp_0.txt motif_0.txt
|
|
117
|
|
118 fi
|
|
119
|
|
120
|
|
121 else # IF there is more than one motif
|
|
122
|
|
123 #check if it's true + parse motifs + warn
|
|
124 N=`cat $motif_file | awk '/^[0-9\-ACTGatgc*]/ {print}' | awk ' BEGIN{RS="*"; n=0} {printf $0 > "motif_tmp_"n++} END {print n}'`
|
|
125
|
|
126 # reverse pmw (or not if it's vertical)
|
|
127
|
|
128 for ((i=0 ; i< $N ; i++))
|
|
129 do
|
|
130 # remove empty lines + check if it's a pmw, motif words.
|
|
131 cat motif_tmp_$i | awk '/^[0-9\-ACTGatgc]/ {print}' > motif_tmp_tmp_$i
|
|
132 type=`cat motif_tmp_tmp_$i | awk ' END { if ($1 ~ /[acATGC]/) {print "words"} else {print "pmw"} }'`
|
|
133
|
|
134 rm motif_tmp_$i
|
|
135 # if pmw : check if it is horizontal, if so, reverse it (ahopro takes only vertical pmws !)
|
|
136
|
|
137 if [[ "$type" == "pmw" ]]; then
|
|
138
|
|
139 NF=`cat motif_tmp_tmp_$i | awk ' END {print NF}'`
|
|
140
|
|
141 if [[ $NF != 4 ]]; then #if it's not vertical, reverse it
|
|
142
|
|
143 for ((c=1 ; c <= $NF; c++))
|
|
144 do
|
|
145 cat motif_tmp_tmp_$i | awk -v c=$c '{ printf $c" " } END {printf"\n"}' >> motif_$i.txt
|
|
146 done
|
|
147 rm motif_tmp_tmp_$i
|
|
148
|
|
149
|
|
150 elif [[ $NF == 0 ]]; then
|
|
151 echo "Please check the motif entries, try removing tabulations, spaces." > $ouput
|
|
152 exit 0
|
|
153
|
|
154 else #if vertical
|
|
155 mv motif_tmp_tmp_$i motif_$i.txt
|
|
156 fi
|
|
157
|
|
158 else #if words
|
|
159 mv motif_tmp_tmp_$i motif_$i.txt
|
|
160
|
|
161 fi
|
|
162
|
|
163 #cat motif_$i.txt
|
|
164 done
|
|
165 fi
|
|
166
|
|
167 ## ****************************************** END motif entery treatment *********************************************************************************************************************
|
|
168
|
|
169 ###motif_0
|
|
170 # next could be improved in bash (sed..)
|
|
171
|
|
172 #Add motif filenames to the configfile byt writing a new_config.txt (in CWD)
|
|
173 ##but first, check what kind of correction ? basic : motifnames only ? or LetterFreqFile also ?
|
|
174
|
|
175 if [ "$function" == "p-value" ] ; then
|
|
176
|
|
177 python $PATH_AHOPRO/replaceLine.py $configfile "name_freq" $letterFreqFile
|
|
178
|
|
179 else
|
|
180
|
|
181 python $PATH_AHOPRO/replaceLine.py $configfile "name"
|
|
182
|
|
183 fi
|
|
184
|
|
185 ## get the new config file
|
|
186 #new_config="$OUTDIR/new_config.txt"
|
|
187 new_config="new_config.txt"
|
|
188
|
|
189 #echo "******** AhoPro Parameters ******** " >> $output
|
|
190 #cat $new_config >> $output
|
|
191 #call ahopro with config file
|
|
192
|
|
193
|
|
194 #echo "******** AhoPro Results ******** " >> $output
|
|
195
|
|
196 $PATH_AHOPRO/ahokocc $new_config >> $output 2>&1
|
|
197
|
|
198 # improve line "Search in sequence .." with user entered sequence name (for more clarity). Do only if seq_name != 'X'
|
|
199
|
|
200 if [ "$SEQ_NAME" != "X" ] ; then
|
|
201
|
|
202 #remove sequence (if it exists)
|
|
203 sed -i '/^Search/d' $output
|
|
204 # add new line
|
|
205 line="\nSearch in sequence : $SEQ_NAME\n"
|
|
206 sed -i "1s/^/$line/" $output
|
|
207
|
|
208 fi
|
|
209
|
|
210 rm $new_config
|
|
211
|