Mercurial > repos > jbrayet > rsat
changeset 2:dce9495ac542 draft
Uploaded
| author | jbrayet | 
|---|---|
| date | Tue, 22 Sep 2015 05:15:21 -0400 | 
| parents | e851d9021494 | 
| children | 9ed6c10d2c5f | 
| files | peakMotifs_wrapper.py | 
| diffstat | 1 files changed, 496 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peakMotifs_wrapper.py Tue Sep 22 05:15:21 2015 -0400 @@ -0,0 +1,496 @@ +#! /usr/bin/python +# -*- coding: utf8 -*- +"""#Peak Motifs - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr> +#Copyright (C) 2015 Institut Curie +# +#This program is free software: you can redistribute it and/or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation, either version 3 of the License, or +#(at your option) any later version. +# +#This program is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +#GNU General Public License for more details. +# +#You should have received a copy of the GNU General Public License +#along with this program. If not, see <http://www.gnu.org/licenses/>. +# +###########################################################' +# +#Client to download peak-motifs results from RSAT server. +# +# +#usage: peak-motifs_soap.py [-h] -test <TEST_FILE> [-control <CONTROL_FILE>] +# [-max_seq_length <MAX_SEQ_LENGTH>] +# [-max_motif_number <MAX_MOTIF_NUMBER>] +# [-top_peaks <TOP_PEAKS>] [-min_length <MIN_LENGTH>] +# [-max_length <MAX_LENGTH>] [-markov <MARKOV_MODEL>] +# [-min_markov <MIN_MARKOV>] +# [-max_markov <MAX_MARKOV>] [-noov <NOOV_DETECTION>] +# [-class_int <CLASS_INT>] [-str <STR_SUMMED>] +# [-graph_title <GRAPH_TITLE>] +# [-image_format <IMAGE_FORMAT>] +# [-disco [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]]] +# [-source <SOURCE_FILE>] [-verb <VERBOSITY>] +# [-ref_motif <REF_MOTIF>] -server <SERVEUR> +# +#optional arguments: +# -h, --help show this help message and exit +# -test <TEST_FILE>, --test_file <TEST_FILE> +# Input test peak sequence in fasta format. +# -control <CONTROL_FILE>, --control_file <CONTROL_FILE> +# Input control peak sequence in fasta format. +# -max_seq_length <MAX_SEQ_LENGTH>, --maxSeqLength <MAX_SEQ_LENGTH> +# Maximal sequence length. +# -max_motif_number <MAX_MOTIF_NUMBER>, --maxMotifNumber <MAX_MOTIF_NUMBER> +# Maximal number of motifs (matrices) to return for +# pattern discovery algorithms. +# -top_peaks <TOP_PEAKS>, --topPeaks <TOP_PEAKS> +# Restrict the analysis to the N peaks at the top of the +# input sequence file. +# -min_length <MIN_LENGTH>, --minLength <MIN_LENGTH> +# Minimal oligonucleotide length. +# -max_length <MAX_LENGTH>, --maxLength <MAX_LENGTH> +# Maximal oligonucleotide length. +# -markov <MARKOV_MODEL>, --markovModel <MARKOV_MODEL> +# Order of the Markov model used to estimatd expected +# oligonucleotide frequencies for oligo-analysis and +# local-word-analysis. +# -min_markov <MIN_MARKOV>, --minMarkov <MIN_MARKOV> +# Minimal value for markov order. Use in combination +# with the next option (max_markov). +# -max_markov <MAX_MARKOV>, --maxMarkov <MAX_MARKOV> +# Maximal value for markov order. Use in combination +# with the previous option (min_markov). +# -noov <NOOV_DETECTION>, --noovDetection <NOOV_DETECTION> +# No overlapping of oligos allowed if value = 1. +# -class_int <CLASS_INT>, --classInt <CLASS_INT> +# Class interval for position-analysis. The width of the +# position classes, in number of bases (default: 20). +# -str <STR_SUMMED>, --strSummed <STR_SUMMED> +# Oligonucleotide occurrences found on both stands are +# summed (2) or not (1). Default is 2. +# -graph_title <GRAPH_TITLE>, --graphTitle <GRAPH_TITLE> +# Title displayed on top of the graphs. +# -image_format <IMAGE_FORMAT>, --imageFormat <IMAGE_FORMAT> +# Image format. All the formats supported by XYgraph can +# be used. +# -disco [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]], --discoAlgorithm [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]] +# Specify the software tool(s) that will be used for +# motif discovery +# (oligos|dyads|positions|local_words|merged_words). +# Several algorithms can be specified either by using a +# comma-separated list of algorithms: -disco +# oligos,dyads +# -source <SOURCE_FILE>, --sourceFile <SOURCE_FILE> +# Enter the source of the fasta sequence file. Supported +# source: galaxy +# -verb <VERBOSITY>, --verbosity <VERBOSITY> +# Verbosity. +# -ref_motif <REF_MOTIF>, --ref_motif <REF_MOTIF> +# Motif annotated in some transcription factor database +# (e.g. RegulonDB, Jaspar, TRANSFAC) for the +# transcription factor of interest. +# -server <SERVEUR>, --server <SERVEUR> +# RSAT server +# -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY> +# +#Version 0.1 - 30/01/2015 - Adapted from Jocelyn Brayet, France Genomique team +# +###########################################################""" +__author__ = 'Jocelyn Brayet' + +###########################################################' +## Import + +import argparse +import os +import urllib +import zipfile +import time +import platform +from suds.client import Client + +###########################################################' + +###########################################################' +## Define log options for suds + +# Import log package +#import logging + +# Import log package +#import logging +# création de l'objet logger qui va nous servir à écrire dans les logs +#logger = logging.getLogger() +# on met le niveau du logger à DEBUG, comme ça il écrit tout +#logger.setLevel(logging.DEBUG) +# Configure log of suds clients to DEBUG for verbose output concerning Client request +#logging.getLogger('suds.client').setLevel(logging.ERROR) +#logging.getLogger('suds.transport').setLevel(logging.ERROR) +#logging.getLogger('suds.xsd.schema').setLevel(logging.ERROR) +#logging.getLogger('suds.wsdl').setLevel(logging.ERROR) + + +# création d'un second handler qui va rediriger chaque écriture de log +# sur la console +#steam_handler = logging.StreamHandler() +#steam_handler.setLevel(logging.DEBUG) +#logger.addHandler(steam_handler) + +#logger.info('Hello') + +#print(client.factory.create('peak_motifs')) + +# (PeakMotifsRequest){ +# output = None -> ok +# verbosity = None +# test = None -> ok +# tmp_test_infile = None +# control = None +# tmp_control_infile = None +# max_seq_length = None +# max_motif_number = None +# ref_motif = None +# top_peaks = None +# min_length = None +# max_length = None +# markov = None +# min_markov = None +# max_markov = None +# noov = None +# class_int = None +# str = None +# graph_title = None +# image_format = None +# disco = None +# source = None +# task = None +# } +# } + + +################################ functions ############################################################ +## Define a function to make a service perform the desired request using provided arguments +def call_run_service(service, args): + """ + Run job in RSAT server. + service -> RSAT web service + args -> web service request + """ + + result = rsat_service.peak_motifs(args) + return result + +def testNone(argument): + """ + Test if argument is None or not. + argument -> argument give by user + """ + + if not argument is None: + variable = argument[0] + else: + variable = "" + return variable + + +###########################################################' +## Function to recup results + +def buildZipUrl(algoResults): + """ + Recup results give by RSAT server. + algoResults -> result give by RSAT server + """ + + recupResult = str(algoResults) + tabResults=recupResult.split("\n") + urlZip = tabResults[4].replace("\t","") + + return urlZip + + +## Tested with python 2.6.6 +peakMotifsVersion = '0.1 - 30/01/2015' + +###########################################################' +# server dictionary +serverDict = { + + "fr_ens":"http://rsat01.biologie.ens.fr/rsat/web_services/RSATWS.wsdl", + "fr_mrs":"http://rsat-tagc.univ-mrs.fr/rsat/web_services/RSATWS.wsdl", + "fr_ro":"http://rsat.sb-roscoff.fr/web_services/RSATWS.wsdl", + "fr_mrs_2":"http://pedagogix-tagc.univ-mrs.fr/rsat/web_services/RSATWS.wsdl", + "es":"http://floresta.eead.csic.es/rsat/web_services/RSATWS.wsdl", + "mx":"http://embnet.ccg.unam.mx/rsa-tools/web_services/RSATWS.wsdl" + + } + +""" +serverDict = { + + "fr_ens":"http://protists.rsat.eu/rsat/web_services/RSATWS.wsdl", + "fr_mrs":"http://fungi.rsat.eu/rsat/web_services/RSATWS.wsdl", + "fr_ro":"http://metazoa.rsat.eu/web_services/RSATWS.wsdl", + "fr_mrs_2":"http://teaching.rsat.eu/rsat/web_services/RSATWS.wsdl", + "es":"http://plants.rsat.eu/rsat/web_services/RSATWS.wsdl", + "mx":"http://prokaryotes.rsat.eu/rsa-tools/web_services/RSATWS.wsdl" + + } +""" + +if __name__ == '__main__': + + ########### peak motifs arguments #################### + parser = argparse.ArgumentParser(description='Client to download peak-motifs results from RSAT server.', epilog='Version '+peakMotifsVersion) + + parser.add_argument('-test', '--test_file', metavar='<TEST_FILE>', type=argparse.FileType('r'), nargs=1, help='Input test peak sequence in fasta format.', required=True) + parser.add_argument('-control', '--control_file', metavar='<CONTROL_FILE>', type=argparse.FileType('r'), nargs=1, help='Input control peak sequence in fasta format.', required=False) + parser.add_argument('-max_seq_length', '--maxSeqLength', metavar='<MAX_SEQ_LENGTH>', type=int, nargs=1, help='Maximal sequence length.', required=False) + parser.add_argument('-max_motif_number', '--maxMotifNumber', metavar='<MAX_MOTIF_NUMBER>', type=int, nargs=1, help='Maximal number of motifs (matrices) to return for pattern discovery algorithms.', required=False) + parser.add_argument('-top_peaks', '--topPeaks', metavar='<TOP_PEAKS>', type=int, nargs=1, help='Restrict the analysis to the N peaks at the top of the input sequence file.', required=False) + parser.add_argument('-min_length', '--minLength', metavar='<MIN_LENGTH>', type=int, nargs=1, help='Minimal oligonucleotide length.', required=False) + parser.add_argument('-max_length', '--maxLength', metavar='<MAX_LENGTH>', type=int, nargs=1, help='Maximal oligonucleotide length.', required=False) + parser.add_argument('-markov', '--markovModel', metavar='<MARKOV_MODEL>', type=int, nargs=1, help='Order of the Markov model used to estimatd expected oligonucleotide frequencies for oligo-analysis and local-word-analysis.', required=False) + parser.add_argument('-min_markov', '--minMarkov', metavar='<MIN_MARKOV>', type=int, nargs=1, help='Minimal value for markov order. Use in combination with the next option (max_markov).', required=False) + parser.add_argument('-max_markov', '--maxMarkov', metavar='<MAX_MARKOV>', type=int, nargs=1, help='Maximal value for markov order. Use in combination with the previous option (min_markov).', required=False) + parser.add_argument('-noov', '--noovDetection', metavar='<NOOV_DETECTION>', type=int, nargs=1, help='No overlapping of oligos allowed if value = 1.', required=False) + parser.add_argument('-class_int', '--classInt', metavar='<CLASS_INT>', type=int, nargs=1, help='Class interval for position-analysis. The width of the position classes, in number of bases (default: 20).', required=False) + parser.add_argument('-str', '--strSummed', metavar='<STR_SUMMED>', type=int, nargs=1, help='Oligonucleotide occurrences found on both stands are summed (2) or not (1). Default is 2.', required=False) + parser.add_argument('-graph_title', '--graphTitle', metavar='<GRAPH_TITLE>', type=str, nargs=1, help='Title displayed on top of the graphs.', required=False) + parser.add_argument('-image_format', '--imageFormat', metavar='<IMAGE_FORMAT>', type=str, nargs=1, help='Image format. All the formats supported by XYgraph can be used.', required=False) + parser.add_argument('-disco', '--discoAlgorithm', metavar='<DISCO_ALGORITHM>', type=str, nargs='*', help='Specify the software tool(s) that will be used for motif discovery (oligos|dyads|positions|local_words|merged_words). Several algorithms can be specified either by using a comma-separated list of algorithms: -disco oligos,dyads', required=False) + parser.add_argument('-source', '--sourceFile', metavar='<SOURCE_FILE>', type=str, nargs=1, help='Enter the source of the fasta sequence file. Supported source: galaxy', required=False) + parser.add_argument('-verb', '--verbosity', metavar='<VERBOSITY>', type=int, nargs=1, help='Verbosity.', required=False) + parser.add_argument('-ref_motif', '--ref_motif', metavar='<REF_MOTIF>', type=argparse.FileType('r'), nargs=1, help='Motif annotated in some transcription factor database (e.g. RegulonDB, Jaspar, TRANSFAC) for the transcription factor of interest.', required=False) + parser.add_argument('-motif_db', '--motif_db', metavar='<MOTIF_DB>', type=str, nargs=1, help='Name of motif database.', required=False) + + ################################ galaxy arguments ############################################################ + parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True) + parser.add_argument('-outGalaxy2', '--outGalaxy2', metavar='<OUT_GALAXY2>', type=str, nargs=1, required=False) + parser.add_argument('-server', '--server', metavar='<SERVEUR>', type=str, nargs=1, help='RSAT server', required=True) + ###########################################################' + + args = parser.parse_args() + + ########################################################### + ## Test arguments + + fasta_test_file = args.test_file[0].read() + + if not args.control_file is None : + fasta_control_file = args.control_file[0].read() + else : + fasta_control_file ="" + + if not args.ref_motif is None : + refMotifValue = args.ref_motif[0].read() + else : + refMotifValue ="" + + maxSeqLengthValue = testNone(args.maxSeqLength) + maxMotifNumberValue = testNone(args.maxMotifNumber) + topPeaksNumber = testNone(args.topPeaks) + minLengthNumber = testNone(args.minLength) + maxLengthNumber = testNone(args.maxLength) + markovModelValue = testNone(args.markovModel) + minMarkovValue = testNone(args.minMarkov) + maxMarkovValue = testNone(args.maxMarkov) + noovValue = testNone(args.noovDetection) + classIntValue = testNone(args.classInt) + strSummedValue = testNone(args.strSummed) + graphTitleValue = testNone(args.graphTitle) + imageFormatValue = testNone(args.imageFormat) + discoAlgorithmValue = testNone(args.discoAlgorithm) + sourceFileValue = testNone(args.sourceFile) + verbosityValue = testNone(args.verbosity) + motifdbValue = testNone(args.motif_db) + outGalaxyValue = testNone(args.outGalaxy) + outGalaxyValue2 = testNone(args.outGalaxy2) + serverValue = testNone(args.server) + + ###########################################################' + ## Create the SOAP client to request the RSAT service + + # Define URL for RSAT services + url = serverDict[serverValue] + print url + + # Create the client + client = Client(url) + + # Need service interface to perform requests + rsat_service = client.service + + # Define client header + userAgent = 'RSAT-Client/v%s (%s; Python %s; %s)' % ( + peakMotifsVersion, + os.path.basename( __file__ ), + platform.python_version(), + platform.system() + ) + + httpHeaders = {'User-agent': userAgent} + client.set_options(headers=httpHeaders) + client.set_options(timeout=300) + + + ###########################################################' + ## Create request + peakMotifsRequest = { + + 'test' : fasta_test_file, + 'control' : fasta_control_file, + 'max_seq_length' : maxSeqLengthValue, + 'max_motif_number' : maxMotifNumberValue, + 'top_peaks' : topPeaksNumber, + 'min_length' : minLengthNumber, + 'max_length' : maxLengthNumber, + 'markov' : markovModelValue, + 'min_markov' : minMarkovValue, + 'max_markov' : maxMarkovValue, + 'noov' : noovValue, + 'class_int' : classIntValue, + 'str' : strSummedValue, + 'graph_title' : graphTitleValue, + 'image_format' : imageFormatValue, + 'disco' : discoAlgorithmValue, + 'source' : sourceFileValue, + 'ref_motif' : refMotifValue, + 'verbosity' : verbosityValue, + 'motif_db' : motifdbValue + #'output' : 'blablabla' + + } + + + ###########################################################' + ## Run job in RSAT server + result = call_run_service(rsat_service, peakMotifsRequest) + + #logFile = open("/bioinfo/users/jbrayet/Bureau/peak_motifs.log","w") + + #logFile.write("###############################################\n") + #logFile.write("Command performed on server\n") + #logFile.write(result.command) + #logFile.write("\n") + #logFile.write("###############################################\n") + #logFile.write("Result\n") + #logFile.write(result.server) + + print("###############################################\n") + print("Command performed on server\n") + print(result.command) + print("\n") + print("###############################################\n") + print("Result\n") + print(result.server) + + ###########################################################' + ## Build result URL + + """ + zipFileDict = { + + "fr_ens":"http://protists.rsat.eu/rsat/", + "fr_mrs":"http://fungi.rsat.eu/rsat/", + "fr_ro":"http://metazoa.rsat.eu/", + "fr_mrs_2":"http://teaching.rsat.eu/rsat/", + "es":"http://plants.rsat.eu/rsat/", + "mx":"http://prokaryotes.rsat.eu/rsa-tools/" + + } + """ + + nameFile = "peak-motifs_results.zip" + urlResult=buildZipUrl(result.server) + print urlResult + + #ogFile.write("\n"+urlResult) + + ###########################################################' + ## Wait RSAT server + while urllib.urlopen(urlResult).getcode() != 200: + #logFile.write(str(urllib.urlopen(urlResult).getcode())+"\n") + time.sleep(5) + + #logFile.write(str(nameFile)+"\n") + + #while urllib.urlretrieve(urlResult, nameFile) + #try: + ###########################################################' + ## Download RSAT results + urllib.urlretrieve(urlResult, nameFile) + #except IOError: + #logFile.write("\nResult URL is false") + #Logger.error("Result URL is false") + + + #logFile.write("\n"+nameFile+"\n") + + ###########################################################' + ## Decompress results + #try: + zfile = zipfile.ZipFile(nameFile, 'r') + #except IOError: + #logFile.write("No zip file") + #Logger.error("No zip file") + + tempflag = 0 + folderName ="" + + for i in zfile.namelist(): ## On parcourt l'ensemble des fichiers de l'archive + + #logFile.write(i+"\n") + ############################### + if tempflag ==0: + folderName = i + + tempflag = 1 + ############################### + + if i.endswith('/'): ## S'il s'agit d'un repertoire, on se contente de creer le dossier + os.makedirs(i) + else: + data = zfile.read(i) ## lecture du fichier compresse + fp = open(i, "wb") ## creation en local du nouveau fichier + fp.write(data) ## ajout des donnees du fichier compresse dans le fichier local + fp.close() + zfile.close() + + #logFile.write("\n"+folderName+"\n") + #logFile.write("\n"+outGalaxyValue+"\n") + + os.popen("cp "+folderName+"peak-motifs_synthesis.html "+outGalaxyValue) + os.popen("cp "+nameFile+" "+folderName) + #logFile.close() + #os.popen("sed -i \"1iHHEELLLOOO\" "+outGalaxyValue) + #os.popen("sed -i \"1i<style type=\'text/css\'></style>\" "+outGalaxyValue) + + ###########################################################' + ##Create results folder name + outGalaxyValueDir = outGalaxyValue.replace(".dat","_files") + + #logFile.write("\noutGalaxyValueDir : " +outGalaxyValueDir) + + #logFile.close() + + # Create results folder + os.popen("mkdir "+outGalaxyValueDir) + + # Copy results files in results folder + os.popen("cp -R "+folderName+"data " + outGalaxyValueDir+"/data") + os.popen("cp -R "+folderName+"reports " + outGalaxyValueDir+"/reports") + os.popen("cp -R "+folderName+"results " + outGalaxyValueDir+"/results") + + if not outGalaxyValue2 =="": + os.popen("cp "+folderName+"results/sites/peak-motifs_all_motifs_seqcoord.bed "+outGalaxyValue2) + + + + + + +
