comparison ahopro_wrapper.sh @ 0:2e121ae0a2eb draft

Uploaded
author jbrayet
date Thu, 11 Feb 2016 08:14:45 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2e121ae0a2eb
1 #!usr/bin/bash
2
3
4 :<<'hey'
5 Command line looks like this :
6 ahopro_wrapper.sh ${ahopro_config} ${motif_file} ${function.function_selector} $outfile $function['seq_name'] $function['nbr_motif'] $actualN
7 # if function = p-value :, commad line will looks like this :
8 ahopro_wrapper.sh ${ahopro_config} ${motif_file} ${function.function_selector} $outfile $function['seq_name'] $function['nbr_motif'] $actualN ${letter_freq_file}
9
10
11 hey
12
13 #sort out arguments
14
15 configfile=$1
16 motif_file=$2
17 function=$3
18 #if function is "p-value", get the freqFile pramater
19 if [ "$function" == "p-value" ] ; then
20 letterFreqFile=$8
21 fi
22 output=$4
23 SEQ_NAME=$5
24 NBR_MOTIFS=$6
25 actual_NBR=$7
26
27
28
29 PATH_AHOPRO="/usr/bin/ahopro/AhoPro.1.3/"
30
31
32 # create tmp workindg dir and cd
33
34 OUTDIR=`mktemp -d`
35
36 #hard code the creation of OUTDIR
37 if [ -d $OUTDIR ]; then
38 chmod -R 777 $OUTDIR
39 else
40 mkdir $OUTDIR
41 fi
42
43 chmod -R 777 $OUTDIR
44
45 cd $OUTDIR
46
47
48
49 #echo "######configfile out of XML form :" > $output
50 #cat $configfile >> $output
51 #echo "motifFile :" >> $output
52 #cat $motif_file > $output
53
54
55
56
57 ## ************ First, check if the number of actual motif is met in the entries. If not, warn, and quit ******************************************************
58
59 if [[ $NBR_MOTIFS != $actual_NBR ]]; then
60
61 cat <<doc >$output
62 AhoPro error :
63
64 The number of motifs set in "Number of motifs" does not match the the actual number of motif entry in "Motifs" section. They should be the same.
65
66 Number of motifs : $NBR_MOTIFS
67 Number of actual motif entered : $actual_NBR
68
69 Please recheck these parameters.
70
71 doc
72
73 exit 0
74
75 fi
76
77 ## ************ END check ******************************************************
78
79
80
81 ### **************** Deal with motif entery ****************************************************************************************************
82
83
84 if [[ $NBR_MOTIFS = 1 ]]; then
85
86 #NO PARSING REQUIRED
87
88 #remove blank lines, tabs, spaces..
89 cat $motif_file | awk '/^[0-9\-ACTGatgc]/ {print}' > motif_tmp_0.txt
90 # reverse if the motif is a pmw, reverse it if necessary
91 type=`cat motif_tmp_0.txt | awk ' END { if ($1 ~ /[acATGC]/) {print "words"} else {print "pmw"} }'`
92
93 if [[ "$type" == "pmw" ]]; then
94
95 NF=`cat motif_tmp_0.txt | awk ' END {print NF}'`
96
97 if [[ $NF != 4 ]]; then #if it's not vertical, reverse it
98
99 for ((c=1 ; c <= $NF; c++))
100 do
101 cat motif_tmp_0.txt | awk -v c=$c '{ printf $c" " } END {printf"\n"}' >> motif_0.txt
102 done
103
104 rm motif_tmp_0.txt
105
106
107 elif [[ $NF == 0 ]]; then
108 echo "Please check the motif entries.Try removing tabulations, spaces." > $output
109 exit 0
110
111 else #if vertical
112 mv motif_tmp_0.txt motif_0.txt
113 fi
114
115 else #if words
116 mv motif_tmp_0.txt motif_0.txt
117
118 fi
119
120
121 else # IF there is more than one motif
122
123 #check if it's true + parse motifs + warn
124 N=`cat $motif_file | awk '/^[0-9\-ACTGatgc*]/ {print}' | awk ' BEGIN{RS="*"; n=0} {printf $0 > "motif_tmp_"n++} END {print n}'`
125
126 # reverse pmw (or not if it's vertical)
127
128 for ((i=0 ; i< $N ; i++))
129 do
130 # remove empty lines + check if it's a pmw, motif words.
131 cat motif_tmp_$i | awk '/^[0-9\-ACTGatgc]/ {print}' > motif_tmp_tmp_$i
132 type=`cat motif_tmp_tmp_$i | awk ' END { if ($1 ~ /[acATGC]/) {print "words"} else {print "pmw"} }'`
133
134 rm motif_tmp_$i
135 # if pmw : check if it is horizontal, if so, reverse it (ahopro takes only vertical pmws !)
136
137 if [[ "$type" == "pmw" ]]; then
138
139 NF=`cat motif_tmp_tmp_$i | awk ' END {print NF}'`
140
141 if [[ $NF != 4 ]]; then #if it's not vertical, reverse it
142
143 for ((c=1 ; c <= $NF; c++))
144 do
145 cat motif_tmp_tmp_$i | awk -v c=$c '{ printf $c" " } END {printf"\n"}' >> motif_$i.txt
146 done
147 rm motif_tmp_tmp_$i
148
149
150 elif [[ $NF == 0 ]]; then
151 echo "Please check the motif entries, try removing tabulations, spaces." > $ouput
152 exit 0
153
154 else #if vertical
155 mv motif_tmp_tmp_$i motif_$i.txt
156 fi
157
158 else #if words
159 mv motif_tmp_tmp_$i motif_$i.txt
160
161 fi
162
163 #cat motif_$i.txt
164 done
165 fi
166
167 ## ****************************************** END motif entery treatment *********************************************************************************************************************
168
169 ###motif_0
170 # next could be improved in bash (sed..)
171
172 #Add motif filenames to the configfile byt writing a new_config.txt (in CWD)
173 ##but first, check what kind of correction ? basic : motifnames only ? or LetterFreqFile also ?
174
175 if [ "$function" == "p-value" ] ; then
176
177 python $PATH_AHOPRO/replaceLine.py $configfile "name_freq" $letterFreqFile
178
179 else
180
181 python $PATH_AHOPRO/replaceLine.py $configfile "name"
182
183 fi
184
185 ## get the new config file
186 #new_config="$OUTDIR/new_config.txt"
187 new_config="new_config.txt"
188
189 #echo "******** AhoPro Parameters ******** " >> $output
190 #cat $new_config >> $output
191 #call ahopro with config file
192
193
194 #echo "******** AhoPro Results ******** " >> $output
195
196 $PATH_AHOPRO/ahokocc $new_config >> $output 2>&1
197
198 # improve line "Search in sequence .." with user entered sequence name (for more clarity). Do only if seq_name != 'X'
199
200 if [ "$SEQ_NAME" != "X" ] ; then
201
202 #remove sequence (if it exists)
203 sed -i '/^Search/d' $output
204 # add new line
205 line="\nSearch in sequence : $SEQ_NAME\n"
206 sed -i "1s/^/$line/" $output
207
208 fi
209
210 rm $new_config
211