2
|
1 #! /usr/bin/python
|
|
2 # -*- coding: utf8 -*-
|
|
3 """#Peak Motifs - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr>
|
|
4 #Copyright (C) 2015 Institut Curie
|
|
5 #
|
|
6 #This program is free software: you can redistribute it and/or modify
|
|
7 #it under the terms of the GNU General Public License as published by
|
|
8 #the Free Software Foundation, either version 3 of the License, or
|
|
9 #(at your option) any later version.
|
|
10 #
|
|
11 #This program is distributed in the hope that it will be useful,
|
|
12 #but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 #GNU General Public License for more details.
|
|
15 #
|
|
16 #You should have received a copy of the GNU General Public License
|
|
17 #along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18 #
|
|
19 ###########################################################'
|
|
20 #
|
|
21 #Client to download peak-motifs results from RSAT server.
|
|
22 #
|
|
23 #
|
|
24 #usage: peak-motifs_soap.py [-h] -test <TEST_FILE> [-control <CONTROL_FILE>]
|
|
25 # [-max_seq_length <MAX_SEQ_LENGTH>]
|
|
26 # [-max_motif_number <MAX_MOTIF_NUMBER>]
|
|
27 # [-top_peaks <TOP_PEAKS>] [-min_length <MIN_LENGTH>]
|
|
28 # [-max_length <MAX_LENGTH>] [-markov <MARKOV_MODEL>]
|
|
29 # [-min_markov <MIN_MARKOV>]
|
|
30 # [-max_markov <MAX_MARKOV>] [-noov <NOOV_DETECTION>]
|
|
31 # [-class_int <CLASS_INT>] [-str <STR_SUMMED>]
|
|
32 # [-graph_title <GRAPH_TITLE>]
|
|
33 # [-image_format <IMAGE_FORMAT>]
|
|
34 # [-disco [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]]]
|
|
35 # [-source <SOURCE_FILE>] [-verb <VERBOSITY>]
|
|
36 # [-ref_motif <REF_MOTIF>] -server <SERVEUR>
|
|
37 #
|
|
38 #optional arguments:
|
|
39 # -h, --help show this help message and exit
|
|
40 # -test <TEST_FILE>, --test_file <TEST_FILE>
|
|
41 # Input test peak sequence in fasta format.
|
|
42 # -control <CONTROL_FILE>, --control_file <CONTROL_FILE>
|
|
43 # Input control peak sequence in fasta format.
|
|
44 # -max_seq_length <MAX_SEQ_LENGTH>, --maxSeqLength <MAX_SEQ_LENGTH>
|
|
45 # Maximal sequence length.
|
|
46 # -max_motif_number <MAX_MOTIF_NUMBER>, --maxMotifNumber <MAX_MOTIF_NUMBER>
|
|
47 # Maximal number of motifs (matrices) to return for
|
|
48 # pattern discovery algorithms.
|
|
49 # -top_peaks <TOP_PEAKS>, --topPeaks <TOP_PEAKS>
|
|
50 # Restrict the analysis to the N peaks at the top of the
|
|
51 # input sequence file.
|
|
52 # -min_length <MIN_LENGTH>, --minLength <MIN_LENGTH>
|
|
53 # Minimal oligonucleotide length.
|
|
54 # -max_length <MAX_LENGTH>, --maxLength <MAX_LENGTH>
|
|
55 # Maximal oligonucleotide length.
|
|
56 # -markov <MARKOV_MODEL>, --markovModel <MARKOV_MODEL>
|
|
57 # Order of the Markov model used to estimatd expected
|
|
58 # oligonucleotide frequencies for oligo-analysis and
|
|
59 # local-word-analysis.
|
|
60 # -min_markov <MIN_MARKOV>, --minMarkov <MIN_MARKOV>
|
|
61 # Minimal value for markov order. Use in combination
|
|
62 # with the next option (max_markov).
|
|
63 # -max_markov <MAX_MARKOV>, --maxMarkov <MAX_MARKOV>
|
|
64 # Maximal value for markov order. Use in combination
|
|
65 # with the previous option (min_markov).
|
|
66 # -noov <NOOV_DETECTION>, --noovDetection <NOOV_DETECTION>
|
|
67 # No overlapping of oligos allowed if value = 1.
|
|
68 # -class_int <CLASS_INT>, --classInt <CLASS_INT>
|
|
69 # Class interval for position-analysis. The width of the
|
|
70 # position classes, in number of bases (default: 20).
|
|
71 # -str <STR_SUMMED>, --strSummed <STR_SUMMED>
|
|
72 # Oligonucleotide occurrences found on both stands are
|
|
73 # summed (2) or not (1). Default is 2.
|
|
74 # -graph_title <GRAPH_TITLE>, --graphTitle <GRAPH_TITLE>
|
|
75 # Title displayed on top of the graphs.
|
|
76 # -image_format <IMAGE_FORMAT>, --imageFormat <IMAGE_FORMAT>
|
|
77 # Image format. All the formats supported by XYgraph can
|
|
78 # be used.
|
|
79 # -disco [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]], --discoAlgorithm [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]]
|
|
80 # Specify the software tool(s) that will be used for
|
|
81 # motif discovery
|
|
82 # (oligos|dyads|positions|local_words|merged_words).
|
|
83 # Several algorithms can be specified either by using a
|
|
84 # comma-separated list of algorithms: -disco
|
|
85 # oligos,dyads
|
|
86 # -source <SOURCE_FILE>, --sourceFile <SOURCE_FILE>
|
|
87 # Enter the source of the fasta sequence file. Supported
|
|
88 # source: galaxy
|
|
89 # -verb <VERBOSITY>, --verbosity <VERBOSITY>
|
|
90 # Verbosity.
|
|
91 # -ref_motif <REF_MOTIF>, --ref_motif <REF_MOTIF>
|
|
92 # Motif annotated in some transcription factor database
|
|
93 # (e.g. RegulonDB, Jaspar, TRANSFAC) for the
|
|
94 # transcription factor of interest.
|
|
95 # -server <SERVEUR>, --server <SERVEUR>
|
|
96 # RSAT server
|
|
97 # -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY>
|
|
98 #
|
51
|
99 #Version 2.0 - 30/01/2015 - Adapted from Jocelyn Brayet, France Genomique team
|
2
|
100 #
|
|
101 ###########################################################"""
|
|
102 __author__ = 'Jocelyn Brayet'
|
|
103
|
|
104 ###########################################################'
|
|
105 ## Import
|
|
106
|
|
107 import argparse
|
|
108 import os
|
|
109 import urllib
|
|
110 import zipfile
|
|
111 import time
|
|
112 import platform
|
|
113 from suds.client import Client
|
|
114
|
|
115 ################################ functions ############################################################
|
|
116 ## Define a function to make a service perform the desired request using provided arguments
|
|
117 def call_run_service(service, args):
|
|
118 """
|
|
119 Run job in RSAT server.
|
|
120 service -> RSAT web service
|
|
121 args -> web service request
|
|
122 """
|
|
123
|
|
124 result = rsat_service.peak_motifs(args)
|
|
125 return result
|
|
126
|
|
127 def testNone(argument):
|
|
128 """
|
|
129 Test if argument is None or not.
|
|
130 argument -> argument give by user
|
|
131 """
|
|
132
|
|
133 if not argument is None:
|
|
134 variable = argument[0]
|
|
135 else:
|
|
136 variable = ""
|
|
137 return variable
|
|
138
|
|
139
|
|
140 ###########################################################'
|
50
|
141 ## Functions to recup results
|
2
|
142
|
|
143 def buildZipUrl(algoResults):
|
|
144 """
|
|
145 Recup results give by RSAT server.
|
50
|
146 algoResults -> response gave by RSAT server
|
2
|
147 """
|
|
148
|
|
149 recupResult = str(algoResults)
|
|
150 tabResults=recupResult.split("\n")
|
|
151 urlZip = tabResults[4].replace("\t","")
|
|
152
|
|
153 return urlZip
|
|
154
|
50
|
155 def recupRSATResult(urlResult,nameFile):
|
|
156
|
|
157 """
|
|
158 Recup results give by RSAT server.
|
|
159 urlResult -> URL gave by RSAT server
|
|
160 nameFile -> name of zip file in Galaxy path
|
|
161 """
|
|
162
|
|
163 ###########################################################'
|
|
164 ## Download RSAT results
|
|
165 urllib.urlretrieve(urlResult, nameFile)
|
|
166
|
|
167 ###########################################################'
|
|
168 ## Decompress results
|
|
169 zfile = zipfile.ZipFile(nameFile, 'r')
|
|
170
|
|
171 tempflag = 0
|
|
172 folderName =""
|
|
173
|
|
174 for i in zfile.namelist(): ## On parcourt l'ensemble des fichiers de l'archive
|
|
175
|
|
176 #logFile.write(i+"\n")
|
|
177 ###############################
|
|
178 if tempflag ==0:
|
|
179 folderName = i
|
|
180
|
|
181 tempflag = 1
|
|
182 ###############################
|
|
183
|
|
184 if i.endswith('/'): ## S'il s'agit d'un repertoire, on se contente de creer le dossier
|
|
185 os.makedirs(i)
|
|
186 else:
|
|
187 data = zfile.read(i) ## lecture du fichier compresse
|
|
188 fp = open(i, "wb") ## creation en local du nouveau fichier
|
|
189 fp.write(data) ## ajout des donnees du fichier compresse dans le fichier local
|
|
190 fp.close()
|
|
191 zfile.close()
|
|
192
|
|
193 return folderName
|
2
|
194
|
|
195 ## Tested with python 2.6.6
|
50
|
196 peakMotifsVersion = '2.0 - 30/01/2015'
|
2
|
197
|
|
198 ###########################################################'
|
|
199 # server dictionary
|
|
200 serverDict = {
|
|
201
|
|
202 "fr_ens":"http://rsat01.biologie.ens.fr/rsat/web_services/RSATWS.wsdl",
|
|
203 "fr_mrs":"http://rsat-tagc.univ-mrs.fr/rsat/web_services/RSATWS.wsdl",
|
|
204 "fr_ro":"http://rsat.sb-roscoff.fr/web_services/RSATWS.wsdl",
|
|
205 "fr_mrs_2":"http://pedagogix-tagc.univ-mrs.fr/rsat/web_services/RSATWS.wsdl",
|
|
206 "es":"http://floresta.eead.csic.es/rsat/web_services/RSATWS.wsdl",
|
|
207 "mx":"http://embnet.ccg.unam.mx/rsa-tools/web_services/RSATWS.wsdl"
|
|
208
|
|
209 }
|
|
210
|
|
211 """
|
|
212 serverDict = {
|
|
213
|
|
214 "fr_ens":"http://protists.rsat.eu/rsat/web_services/RSATWS.wsdl",
|
|
215 "fr_mrs":"http://fungi.rsat.eu/rsat/web_services/RSATWS.wsdl",
|
|
216 "fr_ro":"http://metazoa.rsat.eu/web_services/RSATWS.wsdl",
|
|
217 "fr_mrs_2":"http://teaching.rsat.eu/rsat/web_services/RSATWS.wsdl",
|
|
218 "es":"http://plants.rsat.eu/rsat/web_services/RSATWS.wsdl",
|
|
219 "mx":"http://prokaryotes.rsat.eu/rsa-tools/web_services/RSATWS.wsdl"
|
|
220
|
|
221 }
|
|
222 """
|
|
223
|
|
224 if __name__ == '__main__':
|
|
225
|
|
226 ########### peak motifs arguments ####################
|
|
227 parser = argparse.ArgumentParser(description='Client to download peak-motifs results from RSAT server.', epilog='Version '+peakMotifsVersion)
|
|
228
|
|
229 parser.add_argument('-test', '--test_file', metavar='<TEST_FILE>', type=argparse.FileType('r'), nargs=1, help='Input test peak sequence in fasta format.', required=True)
|
|
230 parser.add_argument('-control', '--control_file', metavar='<CONTROL_FILE>', type=argparse.FileType('r'), nargs=1, help='Input control peak sequence in fasta format.', required=False)
|
|
231 parser.add_argument('-max_seq_length', '--maxSeqLength', metavar='<MAX_SEQ_LENGTH>', type=int, nargs=1, help='Maximal sequence length.', required=False)
|
|
232 parser.add_argument('-max_motif_number', '--maxMotifNumber', metavar='<MAX_MOTIF_NUMBER>', type=int, nargs=1, help='Maximal number of motifs (matrices) to return for pattern discovery algorithms.', required=False)
|
|
233 parser.add_argument('-top_peaks', '--topPeaks', metavar='<TOP_PEAKS>', type=int, nargs=1, help='Restrict the analysis to the N peaks at the top of the input sequence file.', required=False)
|
|
234 parser.add_argument('-min_length', '--minLength', metavar='<MIN_LENGTH>', type=int, nargs=1, help='Minimal oligonucleotide length.', required=False)
|
|
235 parser.add_argument('-max_length', '--maxLength', metavar='<MAX_LENGTH>', type=int, nargs=1, help='Maximal oligonucleotide length.', required=False)
|
|
236 parser.add_argument('-markov', '--markovModel', metavar='<MARKOV_MODEL>', type=int, nargs=1, help='Order of the Markov model used to estimatd expected oligonucleotide frequencies for oligo-analysis and local-word-analysis.', required=False)
|
|
237 parser.add_argument('-min_markov', '--minMarkov', metavar='<MIN_MARKOV>', type=int, nargs=1, help='Minimal value for markov order. Use in combination with the next option (max_markov).', required=False)
|
|
238 parser.add_argument('-max_markov', '--maxMarkov', metavar='<MAX_MARKOV>', type=int, nargs=1, help='Maximal value for markov order. Use in combination with the previous option (min_markov).', required=False)
|
|
239 parser.add_argument('-noov', '--noovDetection', metavar='<NOOV_DETECTION>', type=int, nargs=1, help='No overlapping of oligos allowed if value = 1.', required=False)
|
|
240 parser.add_argument('-class_int', '--classInt', metavar='<CLASS_INT>', type=int, nargs=1, help='Class interval for position-analysis. The width of the position classes, in number of bases (default: 20).', required=False)
|
|
241 parser.add_argument('-str', '--strSummed', metavar='<STR_SUMMED>', type=int, nargs=1, help='Oligonucleotide occurrences found on both stands are summed (2) or not (1). Default is 2.', required=False)
|
|
242 parser.add_argument('-graph_title', '--graphTitle', metavar='<GRAPH_TITLE>', type=str, nargs=1, help='Title displayed on top of the graphs.', required=False)
|
|
243 parser.add_argument('-image_format', '--imageFormat', metavar='<IMAGE_FORMAT>', type=str, nargs=1, help='Image format. All the formats supported by XYgraph can be used.', required=False)
|
|
244 parser.add_argument('-disco', '--discoAlgorithm', metavar='<DISCO_ALGORITHM>', type=str, nargs='*', help='Specify the software tool(s) that will be used for motif discovery (oligos|dyads|positions|local_words|merged_words). Several algorithms can be specified either by using a comma-separated list of algorithms: -disco oligos,dyads', required=False)
|
|
245 parser.add_argument('-source', '--sourceFile', metavar='<SOURCE_FILE>', type=str, nargs=1, help='Enter the source of the fasta sequence file. Supported source: galaxy', required=False)
|
|
246 parser.add_argument('-verb', '--verbosity', metavar='<VERBOSITY>', type=int, nargs=1, help='Verbosity.', required=False)
|
|
247 parser.add_argument('-ref_motif', '--ref_motif', metavar='<REF_MOTIF>', type=argparse.FileType('r'), nargs=1, help='Motif annotated in some transcription factor database (e.g. RegulonDB, Jaspar, TRANSFAC) for the transcription factor of interest.', required=False)
|
|
248 parser.add_argument('-motif_db', '--motif_db', metavar='<MOTIF_DB>', type=str, nargs=1, help='Name of motif database.', required=False)
|
|
249
|
|
250 ################################ galaxy arguments ############################################################
|
|
251 parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True)
|
|
252 parser.add_argument('-outGalaxy2', '--outGalaxy2', metavar='<OUT_GALAXY2>', type=str, nargs=1, required=False)
|
|
253 parser.add_argument('-server', '--server', metavar='<SERVEUR>', type=str, nargs=1, help='RSAT server', required=True)
|
|
254 ###########################################################'
|
|
255
|
|
256 args = parser.parse_args()
|
|
257
|
|
258 ###########################################################
|
|
259 ## Test arguments
|
|
260
|
|
261 fasta_test_file = args.test_file[0].read()
|
|
262
|
|
263 if not args.control_file is None :
|
|
264 fasta_control_file = args.control_file[0].read()
|
|
265 else :
|
|
266 fasta_control_file =""
|
|
267
|
|
268 if not args.ref_motif is None :
|
|
269 refMotifValue = args.ref_motif[0].read()
|
|
270 else :
|
|
271 refMotifValue =""
|
|
272
|
|
273 maxSeqLengthValue = testNone(args.maxSeqLength)
|
|
274 maxMotifNumberValue = testNone(args.maxMotifNumber)
|
|
275 topPeaksNumber = testNone(args.topPeaks)
|
|
276 minLengthNumber = testNone(args.minLength)
|
|
277 maxLengthNumber = testNone(args.maxLength)
|
|
278 markovModelValue = testNone(args.markovModel)
|
|
279 minMarkovValue = testNone(args.minMarkov)
|
|
280 maxMarkovValue = testNone(args.maxMarkov)
|
|
281 noovValue = testNone(args.noovDetection)
|
|
282 classIntValue = testNone(args.classInt)
|
|
283 strSummedValue = testNone(args.strSummed)
|
|
284 graphTitleValue = testNone(args.graphTitle)
|
|
285 imageFormatValue = testNone(args.imageFormat)
|
|
286 discoAlgorithmValue = testNone(args.discoAlgorithm)
|
|
287 sourceFileValue = testNone(args.sourceFile)
|
|
288 verbosityValue = testNone(args.verbosity)
|
|
289 motifdbValue = testNone(args.motif_db)
|
|
290 outGalaxyValue = testNone(args.outGalaxy)
|
|
291 outGalaxyValue2 = testNone(args.outGalaxy2)
|
|
292 serverValue = testNone(args.server)
|
|
293
|
|
294 ###########################################################'
|
|
295 ## Create the SOAP client to request the RSAT service
|
|
296
|
|
297 # Define URL for RSAT services
|
|
298 url = serverDict[serverValue]
|
|
299 print url
|
|
300
|
|
301 # Create the client
|
|
302 client = Client(url)
|
|
303
|
|
304 # Need service interface to perform requests
|
|
305 rsat_service = client.service
|
|
306
|
|
307 # Define client header
|
|
308 userAgent = 'RSAT-Client/v%s (%s; Python %s; %s)' % (
|
|
309 peakMotifsVersion,
|
|
310 os.path.basename( __file__ ),
|
|
311 platform.python_version(),
|
|
312 platform.system()
|
|
313 )
|
|
314
|
|
315 httpHeaders = {'User-agent': userAgent}
|
|
316 client.set_options(headers=httpHeaders)
|
|
317 client.set_options(timeout=300)
|
|
318
|
|
319
|
|
320 ###########################################################'
|
|
321 ## Create request
|
|
322 peakMotifsRequest = {
|
|
323
|
|
324 'test' : fasta_test_file,
|
|
325 'control' : fasta_control_file,
|
|
326 'max_seq_length' : maxSeqLengthValue,
|
|
327 'max_motif_number' : maxMotifNumberValue,
|
|
328 'top_peaks' : topPeaksNumber,
|
|
329 'min_length' : minLengthNumber,
|
|
330 'max_length' : maxLengthNumber,
|
|
331 'markov' : markovModelValue,
|
|
332 'min_markov' : minMarkovValue,
|
|
333 'max_markov' : maxMarkovValue,
|
|
334 'noov' : noovValue,
|
|
335 'class_int' : classIntValue,
|
|
336 'str' : strSummedValue,
|
|
337 'graph_title' : graphTitleValue,
|
|
338 'image_format' : imageFormatValue,
|
|
339 'disco' : discoAlgorithmValue,
|
|
340 'source' : sourceFileValue,
|
|
341 'ref_motif' : refMotifValue,
|
|
342 'verbosity' : verbosityValue,
|
|
343 'motif_db' : motifdbValue
|
|
344 #'output' : 'blablabla'
|
|
345
|
|
346 }
|
|
347
|
|
348
|
|
349 ###########################################################'
|
|
350 ## Run job in RSAT server
|
|
351 result = call_run_service(rsat_service, peakMotifsRequest)
|
|
352
|
|
353 print("###############################################\n")
|
|
354 print("Command performed on server\n")
|
|
355 print(result.command)
|
|
356 print("\n")
|
|
357 print("###############################################\n")
|
|
358 print("Result\n")
|
|
359 print(result.server)
|
|
360
|
|
361 ###########################################################'
|
|
362 ## Build result URL
|
|
363
|
|
364 """
|
|
365 zipFileDict = {
|
|
366
|
|
367 "fr_ens":"http://protists.rsat.eu/rsat/",
|
|
368 "fr_mrs":"http://fungi.rsat.eu/rsat/",
|
|
369 "fr_ro":"http://metazoa.rsat.eu/",
|
|
370 "fr_mrs_2":"http://teaching.rsat.eu/rsat/",
|
|
371 "es":"http://plants.rsat.eu/rsat/",
|
|
372 "mx":"http://prokaryotes.rsat.eu/rsa-tools/"
|
|
373
|
|
374 }
|
|
375 """
|
|
376
|
52
|
377 nameFile = "peak-motifs_archive.zip"
|
2
|
378 urlResult=buildZipUrl(result.server)
|
|
379 print urlResult
|
|
380
|
|
381 ###########################################################'
|
|
382 ## Wait RSAT server
|
|
383 while urllib.urlopen(urlResult).getcode() != 200:
|
|
384 time.sleep(5)
|
|
385
|
50
|
386 folderName=recupRSATResult(urlResult,nameFile)
|
2
|
387
|
50
|
388 while(not(os.path.exists(folderName+"peak-motifs_synthesis.html"))):
|
|
389 os.popen("rm -rf "+folderName)
|
|
390 recupRSATResult(urlResult,nameFile)
|
2
|
391
|
|
392 os.popen("cp "+folderName+"peak-motifs_synthesis.html "+outGalaxyValue)
|
|
393
|
|
394 ###########################################################'
|
|
395 ##Create results folder name
|
|
396
|
|
397 # Create results folder
|
52
|
398 outGalaxyValueDir = outGalaxyValue.replace(".dat","_files")
|
|
399 os.popen("mkdir "+outGalaxyValueDir)
|
2
|
400
|
|
401 # Copy results files in results folder
|
52
|
402
|
|
403 os.popen("cp "+nameFile+" "+outGalaxyValueDir+"/"+nameFile)
|
|
404 os.popen("cp -R "+folderName+"* " + outGalaxyValueDir+"/")
|
2
|
405
|
|
406 if not outGalaxyValue2 =="":
|
|
407 os.popen("cp "+folderName+"results/sites/peak-motifs_all_motifs_seqcoord.bed "+outGalaxyValue2)
|
|
408
|
52
|
409
|