comparison peakMotifs_wrapper.py @ 2:dce9495ac542 draft

Uploaded
author jbrayet
date Tue, 22 Sep 2015 05:15:21 -0400
parents
children a21c2253cafc
comparison
equal deleted inserted replaced
1:e851d9021494 2:dce9495ac542
1 #! /usr/bin/python
2 # -*- coding: utf8 -*-
3 """#Peak Motifs - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr>
4 #Copyright (C) 2015 Institut Curie
5 #
6 #This program is free software: you can redistribute it and/or modify
7 #it under the terms of the GNU General Public License as published by
8 #the Free Software Foundation, either version 3 of the License, or
9 #(at your option) any later version.
10 #
11 #This program is distributed in the hope that it will be useful,
12 #but WITHOUT ANY WARRANTY; without even the implied warranty of
13 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 #GNU General Public License for more details.
15 #
16 #You should have received a copy of the GNU General Public License
17 #along with this program. If not, see <http://www.gnu.org/licenses/>.
18 #
19 ###########################################################'
20 #
21 #Client to download peak-motifs results from RSAT server.
22 #
23 #
24 #usage: peak-motifs_soap.py [-h] -test <TEST_FILE> [-control <CONTROL_FILE>]
25 # [-max_seq_length <MAX_SEQ_LENGTH>]
26 # [-max_motif_number <MAX_MOTIF_NUMBER>]
27 # [-top_peaks <TOP_PEAKS>] [-min_length <MIN_LENGTH>]
28 # [-max_length <MAX_LENGTH>] [-markov <MARKOV_MODEL>]
29 # [-min_markov <MIN_MARKOV>]
30 # [-max_markov <MAX_MARKOV>] [-noov <NOOV_DETECTION>]
31 # [-class_int <CLASS_INT>] [-str <STR_SUMMED>]
32 # [-graph_title <GRAPH_TITLE>]
33 # [-image_format <IMAGE_FORMAT>]
34 # [-disco [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]]]
35 # [-source <SOURCE_FILE>] [-verb <VERBOSITY>]
36 # [-ref_motif <REF_MOTIF>] -server <SERVEUR>
37 #
38 #optional arguments:
39 # -h, --help show this help message and exit
40 # -test <TEST_FILE>, --test_file <TEST_FILE>
41 # Input test peak sequence in fasta format.
42 # -control <CONTROL_FILE>, --control_file <CONTROL_FILE>
43 # Input control peak sequence in fasta format.
44 # -max_seq_length <MAX_SEQ_LENGTH>, --maxSeqLength <MAX_SEQ_LENGTH>
45 # Maximal sequence length.
46 # -max_motif_number <MAX_MOTIF_NUMBER>, --maxMotifNumber <MAX_MOTIF_NUMBER>
47 # Maximal number of motifs (matrices) to return for
48 # pattern discovery algorithms.
49 # -top_peaks <TOP_PEAKS>, --topPeaks <TOP_PEAKS>
50 # Restrict the analysis to the N peaks at the top of the
51 # input sequence file.
52 # -min_length <MIN_LENGTH>, --minLength <MIN_LENGTH>
53 # Minimal oligonucleotide length.
54 # -max_length <MAX_LENGTH>, --maxLength <MAX_LENGTH>
55 # Maximal oligonucleotide length.
56 # -markov <MARKOV_MODEL>, --markovModel <MARKOV_MODEL>
57 # Order of the Markov model used to estimatd expected
58 # oligonucleotide frequencies for oligo-analysis and
59 # local-word-analysis.
60 # -min_markov <MIN_MARKOV>, --minMarkov <MIN_MARKOV>
61 # Minimal value for markov order. Use in combination
62 # with the next option (max_markov).
63 # -max_markov <MAX_MARKOV>, --maxMarkov <MAX_MARKOV>
64 # Maximal value for markov order. Use in combination
65 # with the previous option (min_markov).
66 # -noov <NOOV_DETECTION>, --noovDetection <NOOV_DETECTION>
67 # No overlapping of oligos allowed if value = 1.
68 # -class_int <CLASS_INT>, --classInt <CLASS_INT>
69 # Class interval for position-analysis. The width of the
70 # position classes, in number of bases (default: 20).
71 # -str <STR_SUMMED>, --strSummed <STR_SUMMED>
72 # Oligonucleotide occurrences found on both stands are
73 # summed (2) or not (1). Default is 2.
74 # -graph_title <GRAPH_TITLE>, --graphTitle <GRAPH_TITLE>
75 # Title displayed on top of the graphs.
76 # -image_format <IMAGE_FORMAT>, --imageFormat <IMAGE_FORMAT>
77 # Image format. All the formats supported by XYgraph can
78 # be used.
79 # -disco [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]], --discoAlgorithm [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]]
80 # Specify the software tool(s) that will be used for
81 # motif discovery
82 # (oligos|dyads|positions|local_words|merged_words).
83 # Several algorithms can be specified either by using a
84 # comma-separated list of algorithms: -disco
85 # oligos,dyads
86 # -source <SOURCE_FILE>, --sourceFile <SOURCE_FILE>
87 # Enter the source of the fasta sequence file. Supported
88 # source: galaxy
89 # -verb <VERBOSITY>, --verbosity <VERBOSITY>
90 # Verbosity.
91 # -ref_motif <REF_MOTIF>, --ref_motif <REF_MOTIF>
92 # Motif annotated in some transcription factor database
93 # (e.g. RegulonDB, Jaspar, TRANSFAC) for the
94 # transcription factor of interest.
95 # -server <SERVEUR>, --server <SERVEUR>
96 # RSAT server
97 # -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY>
98 #
99 #Version 0.1 - 30/01/2015 - Adapted from Jocelyn Brayet, France Genomique team
100 #
101 ###########################################################"""
102 __author__ = 'Jocelyn Brayet'
103
104 ###########################################################'
105 ## Import
106
107 import argparse
108 import os
109 import urllib
110 import zipfile
111 import time
112 import platform
113 from suds.client import Client
114
115 ###########################################################'
116
117 ###########################################################'
118 ## Define log options for suds
119
120 # Import log package
121 #import logging
122
123 # Import log package
124 #import logging
125 # création de l'objet logger qui va nous servir à écrire dans les logs
126 #logger = logging.getLogger()
127 # on met le niveau du logger à DEBUG, comme ça il écrit tout
128 #logger.setLevel(logging.DEBUG)
129 # Configure log of suds clients to DEBUG for verbose output concerning Client request
130 #logging.getLogger('suds.client').setLevel(logging.ERROR)
131 #logging.getLogger('suds.transport').setLevel(logging.ERROR)
132 #logging.getLogger('suds.xsd.schema').setLevel(logging.ERROR)
133 #logging.getLogger('suds.wsdl').setLevel(logging.ERROR)
134
135
136 # création d'un second handler qui va rediriger chaque écriture de log
137 # sur la console
138 #steam_handler = logging.StreamHandler()
139 #steam_handler.setLevel(logging.DEBUG)
140 #logger.addHandler(steam_handler)
141
142 #logger.info('Hello')
143
144 #print(client.factory.create('peak_motifs'))
145
146 # (PeakMotifsRequest){
147 # output = None -> ok
148 # verbosity = None
149 # test = None -> ok
150 # tmp_test_infile = None
151 # control = None
152 # tmp_control_infile = None
153 # max_seq_length = None
154 # max_motif_number = None
155 # ref_motif = None
156 # top_peaks = None
157 # min_length = None
158 # max_length = None
159 # markov = None
160 # min_markov = None
161 # max_markov = None
162 # noov = None
163 # class_int = None
164 # str = None
165 # graph_title = None
166 # image_format = None
167 # disco = None
168 # source = None
169 # task = None
170 # }
171 # }
172
173
174 ################################ functions ############################################################
175 ## Define a function to make a service perform the desired request using provided arguments
176 def call_run_service(service, args):
177 """
178 Run job in RSAT server.
179 service -> RSAT web service
180 args -> web service request
181 """
182
183 result = rsat_service.peak_motifs(args)
184 return result
185
186 def testNone(argument):
187 """
188 Test if argument is None or not.
189 argument -> argument give by user
190 """
191
192 if not argument is None:
193 variable = argument[0]
194 else:
195 variable = ""
196 return variable
197
198
199 ###########################################################'
200 ## Function to recup results
201
202 def buildZipUrl(algoResults):
203 """
204 Recup results give by RSAT server.
205 algoResults -> result give by RSAT server
206 """
207
208 recupResult = str(algoResults)
209 tabResults=recupResult.split("\n")
210 urlZip = tabResults[4].replace("\t","")
211
212 return urlZip
213
214
215 ## Tested with python 2.6.6
216 peakMotifsVersion = '0.1 - 30/01/2015'
217
218 ###########################################################'
219 # server dictionary
220 serverDict = {
221
222 "fr_ens":"http://rsat01.biologie.ens.fr/rsat/web_services/RSATWS.wsdl",
223 "fr_mrs":"http://rsat-tagc.univ-mrs.fr/rsat/web_services/RSATWS.wsdl",
224 "fr_ro":"http://rsat.sb-roscoff.fr/web_services/RSATWS.wsdl",
225 "fr_mrs_2":"http://pedagogix-tagc.univ-mrs.fr/rsat/web_services/RSATWS.wsdl",
226 "es":"http://floresta.eead.csic.es/rsat/web_services/RSATWS.wsdl",
227 "mx":"http://embnet.ccg.unam.mx/rsa-tools/web_services/RSATWS.wsdl"
228
229 }
230
231 """
232 serverDict = {
233
234 "fr_ens":"http://protists.rsat.eu/rsat/web_services/RSATWS.wsdl",
235 "fr_mrs":"http://fungi.rsat.eu/rsat/web_services/RSATWS.wsdl",
236 "fr_ro":"http://metazoa.rsat.eu/web_services/RSATWS.wsdl",
237 "fr_mrs_2":"http://teaching.rsat.eu/rsat/web_services/RSATWS.wsdl",
238 "es":"http://plants.rsat.eu/rsat/web_services/RSATWS.wsdl",
239 "mx":"http://prokaryotes.rsat.eu/rsa-tools/web_services/RSATWS.wsdl"
240
241 }
242 """
243
244 if __name__ == '__main__':
245
246 ########### peak motifs arguments ####################
247 parser = argparse.ArgumentParser(description='Client to download peak-motifs results from RSAT server.', epilog='Version '+peakMotifsVersion)
248
249 parser.add_argument('-test', '--test_file', metavar='<TEST_FILE>', type=argparse.FileType('r'), nargs=1, help='Input test peak sequence in fasta format.', required=True)
250 parser.add_argument('-control', '--control_file', metavar='<CONTROL_FILE>', type=argparse.FileType('r'), nargs=1, help='Input control peak sequence in fasta format.', required=False)
251 parser.add_argument('-max_seq_length', '--maxSeqLength', metavar='<MAX_SEQ_LENGTH>', type=int, nargs=1, help='Maximal sequence length.', required=False)
252 parser.add_argument('-max_motif_number', '--maxMotifNumber', metavar='<MAX_MOTIF_NUMBER>', type=int, nargs=1, help='Maximal number of motifs (matrices) to return for pattern discovery algorithms.', required=False)
253 parser.add_argument('-top_peaks', '--topPeaks', metavar='<TOP_PEAKS>', type=int, nargs=1, help='Restrict the analysis to the N peaks at the top of the input sequence file.', required=False)
254 parser.add_argument('-min_length', '--minLength', metavar='<MIN_LENGTH>', type=int, nargs=1, help='Minimal oligonucleotide length.', required=False)
255 parser.add_argument('-max_length', '--maxLength', metavar='<MAX_LENGTH>', type=int, nargs=1, help='Maximal oligonucleotide length.', required=False)
256 parser.add_argument('-markov', '--markovModel', metavar='<MARKOV_MODEL>', type=int, nargs=1, help='Order of the Markov model used to estimatd expected oligonucleotide frequencies for oligo-analysis and local-word-analysis.', required=False)
257 parser.add_argument('-min_markov', '--minMarkov', metavar='<MIN_MARKOV>', type=int, nargs=1, help='Minimal value for markov order. Use in combination with the next option (max_markov).', required=False)
258 parser.add_argument('-max_markov', '--maxMarkov', metavar='<MAX_MARKOV>', type=int, nargs=1, help='Maximal value for markov order. Use in combination with the previous option (min_markov).', required=False)
259 parser.add_argument('-noov', '--noovDetection', metavar='<NOOV_DETECTION>', type=int, nargs=1, help='No overlapping of oligos allowed if value = 1.', required=False)
260 parser.add_argument('-class_int', '--classInt', metavar='<CLASS_INT>', type=int, nargs=1, help='Class interval for position-analysis. The width of the position classes, in number of bases (default: 20).', required=False)
261 parser.add_argument('-str', '--strSummed', metavar='<STR_SUMMED>', type=int, nargs=1, help='Oligonucleotide occurrences found on both stands are summed (2) or not (1). Default is 2.', required=False)
262 parser.add_argument('-graph_title', '--graphTitle', metavar='<GRAPH_TITLE>', type=str, nargs=1, help='Title displayed on top of the graphs.', required=False)
263 parser.add_argument('-image_format', '--imageFormat', metavar='<IMAGE_FORMAT>', type=str, nargs=1, help='Image format. All the formats supported by XYgraph can be used.', required=False)
264 parser.add_argument('-disco', '--discoAlgorithm', metavar='<DISCO_ALGORITHM>', type=str, nargs='*', help='Specify the software tool(s) that will be used for motif discovery (oligos|dyads|positions|local_words|merged_words). Several algorithms can be specified either by using a comma-separated list of algorithms: -disco oligos,dyads', required=False)
265 parser.add_argument('-source', '--sourceFile', metavar='<SOURCE_FILE>', type=str, nargs=1, help='Enter the source of the fasta sequence file. Supported source: galaxy', required=False)
266 parser.add_argument('-verb', '--verbosity', metavar='<VERBOSITY>', type=int, nargs=1, help='Verbosity.', required=False)
267 parser.add_argument('-ref_motif', '--ref_motif', metavar='<REF_MOTIF>', type=argparse.FileType('r'), nargs=1, help='Motif annotated in some transcription factor database (e.g. RegulonDB, Jaspar, TRANSFAC) for the transcription factor of interest.', required=False)
268 parser.add_argument('-motif_db', '--motif_db', metavar='<MOTIF_DB>', type=str, nargs=1, help='Name of motif database.', required=False)
269
270 ################################ galaxy arguments ############################################################
271 parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True)
272 parser.add_argument('-outGalaxy2', '--outGalaxy2', metavar='<OUT_GALAXY2>', type=str, nargs=1, required=False)
273 parser.add_argument('-server', '--server', metavar='<SERVEUR>', type=str, nargs=1, help='RSAT server', required=True)
274 ###########################################################'
275
276 args = parser.parse_args()
277
278 ###########################################################
279 ## Test arguments
280
281 fasta_test_file = args.test_file[0].read()
282
283 if not args.control_file is None :
284 fasta_control_file = args.control_file[0].read()
285 else :
286 fasta_control_file =""
287
288 if not args.ref_motif is None :
289 refMotifValue = args.ref_motif[0].read()
290 else :
291 refMotifValue =""
292
293 maxSeqLengthValue = testNone(args.maxSeqLength)
294 maxMotifNumberValue = testNone(args.maxMotifNumber)
295 topPeaksNumber = testNone(args.topPeaks)
296 minLengthNumber = testNone(args.minLength)
297 maxLengthNumber = testNone(args.maxLength)
298 markovModelValue = testNone(args.markovModel)
299 minMarkovValue = testNone(args.minMarkov)
300 maxMarkovValue = testNone(args.maxMarkov)
301 noovValue = testNone(args.noovDetection)
302 classIntValue = testNone(args.classInt)
303 strSummedValue = testNone(args.strSummed)
304 graphTitleValue = testNone(args.graphTitle)
305 imageFormatValue = testNone(args.imageFormat)
306 discoAlgorithmValue = testNone(args.discoAlgorithm)
307 sourceFileValue = testNone(args.sourceFile)
308 verbosityValue = testNone(args.verbosity)
309 motifdbValue = testNone(args.motif_db)
310 outGalaxyValue = testNone(args.outGalaxy)
311 outGalaxyValue2 = testNone(args.outGalaxy2)
312 serverValue = testNone(args.server)
313
314 ###########################################################'
315 ## Create the SOAP client to request the RSAT service
316
317 # Define URL for RSAT services
318 url = serverDict[serverValue]
319 print url
320
321 # Create the client
322 client = Client(url)
323
324 # Need service interface to perform requests
325 rsat_service = client.service
326
327 # Define client header
328 userAgent = 'RSAT-Client/v%s (%s; Python %s; %s)' % (
329 peakMotifsVersion,
330 os.path.basename( __file__ ),
331 platform.python_version(),
332 platform.system()
333 )
334
335 httpHeaders = {'User-agent': userAgent}
336 client.set_options(headers=httpHeaders)
337 client.set_options(timeout=300)
338
339
340 ###########################################################'
341 ## Create request
342 peakMotifsRequest = {
343
344 'test' : fasta_test_file,
345 'control' : fasta_control_file,
346 'max_seq_length' : maxSeqLengthValue,
347 'max_motif_number' : maxMotifNumberValue,
348 'top_peaks' : topPeaksNumber,
349 'min_length' : minLengthNumber,
350 'max_length' : maxLengthNumber,
351 'markov' : markovModelValue,
352 'min_markov' : minMarkovValue,
353 'max_markov' : maxMarkovValue,
354 'noov' : noovValue,
355 'class_int' : classIntValue,
356 'str' : strSummedValue,
357 'graph_title' : graphTitleValue,
358 'image_format' : imageFormatValue,
359 'disco' : discoAlgorithmValue,
360 'source' : sourceFileValue,
361 'ref_motif' : refMotifValue,
362 'verbosity' : verbosityValue,
363 'motif_db' : motifdbValue
364 #'output' : 'blablabla'
365
366 }
367
368
369 ###########################################################'
370 ## Run job in RSAT server
371 result = call_run_service(rsat_service, peakMotifsRequest)
372
373 #logFile = open("/bioinfo/users/jbrayet/Bureau/peak_motifs.log","w")
374
375 #logFile.write("###############################################\n")
376 #logFile.write("Command performed on server\n")
377 #logFile.write(result.command)
378 #logFile.write("\n")
379 #logFile.write("###############################################\n")
380 #logFile.write("Result\n")
381 #logFile.write(result.server)
382
383 print("###############################################\n")
384 print("Command performed on server\n")
385 print(result.command)
386 print("\n")
387 print("###############################################\n")
388 print("Result\n")
389 print(result.server)
390
391 ###########################################################'
392 ## Build result URL
393
394 """
395 zipFileDict = {
396
397 "fr_ens":"http://protists.rsat.eu/rsat/",
398 "fr_mrs":"http://fungi.rsat.eu/rsat/",
399 "fr_ro":"http://metazoa.rsat.eu/",
400 "fr_mrs_2":"http://teaching.rsat.eu/rsat/",
401 "es":"http://plants.rsat.eu/rsat/",
402 "mx":"http://prokaryotes.rsat.eu/rsa-tools/"
403
404 }
405 """
406
407 nameFile = "peak-motifs_results.zip"
408 urlResult=buildZipUrl(result.server)
409 print urlResult
410
411 #ogFile.write("\n"+urlResult)
412
413 ###########################################################'
414 ## Wait RSAT server
415 while urllib.urlopen(urlResult).getcode() != 200:
416 #logFile.write(str(urllib.urlopen(urlResult).getcode())+"\n")
417 time.sleep(5)
418
419 #logFile.write(str(nameFile)+"\n")
420
421 #while urllib.urlretrieve(urlResult, nameFile)
422 #try:
423 ###########################################################'
424 ## Download RSAT results
425 urllib.urlretrieve(urlResult, nameFile)
426 #except IOError:
427 #logFile.write("\nResult URL is false")
428 #Logger.error("Result URL is false")
429
430
431 #logFile.write("\n"+nameFile+"\n")
432
433 ###########################################################'
434 ## Decompress results
435 #try:
436 zfile = zipfile.ZipFile(nameFile, 'r')
437 #except IOError:
438 #logFile.write("No zip file")
439 #Logger.error("No zip file")
440
441 tempflag = 0
442 folderName =""
443
444 for i in zfile.namelist(): ## On parcourt l'ensemble des fichiers de l'archive
445
446 #logFile.write(i+"\n")
447 ###############################
448 if tempflag ==0:
449 folderName = i
450
451 tempflag = 1
452 ###############################
453
454 if i.endswith('/'): ## S'il s'agit d'un repertoire, on se contente de creer le dossier
455 os.makedirs(i)
456 else:
457 data = zfile.read(i) ## lecture du fichier compresse
458 fp = open(i, "wb") ## creation en local du nouveau fichier
459 fp.write(data) ## ajout des donnees du fichier compresse dans le fichier local
460 fp.close()
461 zfile.close()
462
463 #logFile.write("\n"+folderName+"\n")
464 #logFile.write("\n"+outGalaxyValue+"\n")
465
466 os.popen("cp "+folderName+"peak-motifs_synthesis.html "+outGalaxyValue)
467 os.popen("cp "+nameFile+" "+folderName)
468 #logFile.close()
469 #os.popen("sed -i \"1iHHEELLLOOO\" "+outGalaxyValue)
470 #os.popen("sed -i \"1i<style type=\'text/css\'></style>\" "+outGalaxyValue)
471
472 ###########################################################'
473 ##Create results folder name
474 outGalaxyValueDir = outGalaxyValue.replace(".dat","_files")
475
476 #logFile.write("\noutGalaxyValueDir : " +outGalaxyValueDir)
477
478 #logFile.close()
479
480 # Create results folder
481 os.popen("mkdir "+outGalaxyValueDir)
482
483 # Copy results files in results folder
484 os.popen("cp -R "+folderName+"data " + outGalaxyValueDir+"/data")
485 os.popen("cp -R "+folderName+"reports " + outGalaxyValueDir+"/reports")
486 os.popen("cp -R "+folderName+"results " + outGalaxyValueDir+"/results")
487
488 if not outGalaxyValue2 =="":
489 os.popen("cp "+folderName+"results/sites/peak-motifs_all_motifs_seqcoord.bed "+outGalaxyValue2)
490
491
492
493
494
495
496