Mercurial > repos > jbrayet > rsat
view peakMotifs_wrapper.py @ 52:a1d369ead6d7 draft default tip
Uploaded
author | jbrayet |
---|---|
date | Tue, 29 Sep 2015 08:25:39 -0400 |
parents | 9e193aa2b9d2 |
children |
line wrap: on
line source
#! /usr/bin/python # -*- coding: utf8 -*- """#Peak Motifs - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr> #Copyright (C) 2015 Institut Curie # #This program is free software: you can redistribute it and/or modify #it under the terms of the GNU General Public License as published by #the Free Software Foundation, either version 3 of the License, or #(at your option) any later version. # #This program is distributed in the hope that it will be useful, #but WITHOUT ANY WARRANTY; without even the implied warranty of #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #GNU General Public License for more details. # #You should have received a copy of the GNU General Public License #along with this program. If not, see <http://www.gnu.org/licenses/>. # ###########################################################' # #Client to download peak-motifs results from RSAT server. # # #usage: peak-motifs_soap.py [-h] -test <TEST_FILE> [-control <CONTROL_FILE>] # [-max_seq_length <MAX_SEQ_LENGTH>] # [-max_motif_number <MAX_MOTIF_NUMBER>] # [-top_peaks <TOP_PEAKS>] [-min_length <MIN_LENGTH>] # [-max_length <MAX_LENGTH>] [-markov <MARKOV_MODEL>] # [-min_markov <MIN_MARKOV>] # [-max_markov <MAX_MARKOV>] [-noov <NOOV_DETECTION>] # [-class_int <CLASS_INT>] [-str <STR_SUMMED>] # [-graph_title <GRAPH_TITLE>] # [-image_format <IMAGE_FORMAT>] # [-disco [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]]] # [-source <SOURCE_FILE>] [-verb <VERBOSITY>] # [-ref_motif <REF_MOTIF>] -server <SERVEUR> # #optional arguments: # -h, --help show this help message and exit # -test <TEST_FILE>, --test_file <TEST_FILE> # Input test peak sequence in fasta format. # -control <CONTROL_FILE>, --control_file <CONTROL_FILE> # Input control peak sequence in fasta format. # -max_seq_length <MAX_SEQ_LENGTH>, --maxSeqLength <MAX_SEQ_LENGTH> # Maximal sequence length. # -max_motif_number <MAX_MOTIF_NUMBER>, --maxMotifNumber <MAX_MOTIF_NUMBER> # Maximal number of motifs (matrices) to return for # pattern discovery algorithms. # -top_peaks <TOP_PEAKS>, --topPeaks <TOP_PEAKS> # Restrict the analysis to the N peaks at the top of the # input sequence file. # -min_length <MIN_LENGTH>, --minLength <MIN_LENGTH> # Minimal oligonucleotide length. # -max_length <MAX_LENGTH>, --maxLength <MAX_LENGTH> # Maximal oligonucleotide length. # -markov <MARKOV_MODEL>, --markovModel <MARKOV_MODEL> # Order of the Markov model used to estimatd expected # oligonucleotide frequencies for oligo-analysis and # local-word-analysis. # -min_markov <MIN_MARKOV>, --minMarkov <MIN_MARKOV> # Minimal value for markov order. Use in combination # with the next option (max_markov). # -max_markov <MAX_MARKOV>, --maxMarkov <MAX_MARKOV> # Maximal value for markov order. Use in combination # with the previous option (min_markov). # -noov <NOOV_DETECTION>, --noovDetection <NOOV_DETECTION> # No overlapping of oligos allowed if value = 1. # -class_int <CLASS_INT>, --classInt <CLASS_INT> # Class interval for position-analysis. The width of the # position classes, in number of bases (default: 20). # -str <STR_SUMMED>, --strSummed <STR_SUMMED> # Oligonucleotide occurrences found on both stands are # summed (2) or not (1). Default is 2. # -graph_title <GRAPH_TITLE>, --graphTitle <GRAPH_TITLE> # Title displayed on top of the graphs. # -image_format <IMAGE_FORMAT>, --imageFormat <IMAGE_FORMAT> # Image format. All the formats supported by XYgraph can # be used. # -disco [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]], --discoAlgorithm [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]] # Specify the software tool(s) that will be used for # motif discovery # (oligos|dyads|positions|local_words|merged_words). # Several algorithms can be specified either by using a # comma-separated list of algorithms: -disco # oligos,dyads # -source <SOURCE_FILE>, --sourceFile <SOURCE_FILE> # Enter the source of the fasta sequence file. Supported # source: galaxy # -verb <VERBOSITY>, --verbosity <VERBOSITY> # Verbosity. # -ref_motif <REF_MOTIF>, --ref_motif <REF_MOTIF> # Motif annotated in some transcription factor database # (e.g. RegulonDB, Jaspar, TRANSFAC) for the # transcription factor of interest. # -server <SERVEUR>, --server <SERVEUR> # RSAT server # -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY> # #Version 2.0 - 30/01/2015 - Adapted from Jocelyn Brayet, France Genomique team # ###########################################################""" __author__ = 'Jocelyn Brayet' ###########################################################' ## Import import argparse import os import urllib import zipfile import time import platform from suds.client import Client ################################ functions ############################################################ ## Define a function to make a service perform the desired request using provided arguments def call_run_service(service, args): """ Run job in RSAT server. service -> RSAT web service args -> web service request """ result = rsat_service.peak_motifs(args) return result def testNone(argument): """ Test if argument is None or not. argument -> argument give by user """ if not argument is None: variable = argument[0] else: variable = "" return variable ###########################################################' ## Functions to recup results def buildZipUrl(algoResults): """ Recup results give by RSAT server. algoResults -> response gave by RSAT server """ recupResult = str(algoResults) tabResults=recupResult.split("\n") urlZip = tabResults[4].replace("\t","") return urlZip def recupRSATResult(urlResult,nameFile): """ Recup results give by RSAT server. urlResult -> URL gave by RSAT server nameFile -> name of zip file in Galaxy path """ ###########################################################' ## Download RSAT results urllib.urlretrieve(urlResult, nameFile) ###########################################################' ## Decompress results zfile = zipfile.ZipFile(nameFile, 'r') tempflag = 0 folderName ="" for i in zfile.namelist(): ## On parcourt l'ensemble des fichiers de l'archive #logFile.write(i+"\n") ############################### if tempflag ==0: folderName = i tempflag = 1 ############################### if i.endswith('/'): ## S'il s'agit d'un repertoire, on se contente de creer le dossier os.makedirs(i) else: data = zfile.read(i) ## lecture du fichier compresse fp = open(i, "wb") ## creation en local du nouveau fichier fp.write(data) ## ajout des donnees du fichier compresse dans le fichier local fp.close() zfile.close() return folderName ## Tested with python 2.6.6 peakMotifsVersion = '2.0 - 30/01/2015' ###########################################################' # server dictionary serverDict = { "fr_ens":"http://rsat01.biologie.ens.fr/rsat/web_services/RSATWS.wsdl", "fr_mrs":"http://rsat-tagc.univ-mrs.fr/rsat/web_services/RSATWS.wsdl", "fr_ro":"http://rsat.sb-roscoff.fr/web_services/RSATWS.wsdl", "fr_mrs_2":"http://pedagogix-tagc.univ-mrs.fr/rsat/web_services/RSATWS.wsdl", "es":"http://floresta.eead.csic.es/rsat/web_services/RSATWS.wsdl", "mx":"http://embnet.ccg.unam.mx/rsa-tools/web_services/RSATWS.wsdl" } """ serverDict = { "fr_ens":"http://protists.rsat.eu/rsat/web_services/RSATWS.wsdl", "fr_mrs":"http://fungi.rsat.eu/rsat/web_services/RSATWS.wsdl", "fr_ro":"http://metazoa.rsat.eu/web_services/RSATWS.wsdl", "fr_mrs_2":"http://teaching.rsat.eu/rsat/web_services/RSATWS.wsdl", "es":"http://plants.rsat.eu/rsat/web_services/RSATWS.wsdl", "mx":"http://prokaryotes.rsat.eu/rsa-tools/web_services/RSATWS.wsdl" } """ if __name__ == '__main__': ########### peak motifs arguments #################### parser = argparse.ArgumentParser(description='Client to download peak-motifs results from RSAT server.', epilog='Version '+peakMotifsVersion) parser.add_argument('-test', '--test_file', metavar='<TEST_FILE>', type=argparse.FileType('r'), nargs=1, help='Input test peak sequence in fasta format.', required=True) parser.add_argument('-control', '--control_file', metavar='<CONTROL_FILE>', type=argparse.FileType('r'), nargs=1, help='Input control peak sequence in fasta format.', required=False) parser.add_argument('-max_seq_length', '--maxSeqLength', metavar='<MAX_SEQ_LENGTH>', type=int, nargs=1, help='Maximal sequence length.', required=False) parser.add_argument('-max_motif_number', '--maxMotifNumber', metavar='<MAX_MOTIF_NUMBER>', type=int, nargs=1, help='Maximal number of motifs (matrices) to return for pattern discovery algorithms.', required=False) parser.add_argument('-top_peaks', '--topPeaks', metavar='<TOP_PEAKS>', type=int, nargs=1, help='Restrict the analysis to the N peaks at the top of the input sequence file.', required=False) parser.add_argument('-min_length', '--minLength', metavar='<MIN_LENGTH>', type=int, nargs=1, help='Minimal oligonucleotide length.', required=False) parser.add_argument('-max_length', '--maxLength', metavar='<MAX_LENGTH>', type=int, nargs=1, help='Maximal oligonucleotide length.', required=False) parser.add_argument('-markov', '--markovModel', metavar='<MARKOV_MODEL>', type=int, nargs=1, help='Order of the Markov model used to estimatd expected oligonucleotide frequencies for oligo-analysis and local-word-analysis.', required=False) parser.add_argument('-min_markov', '--minMarkov', metavar='<MIN_MARKOV>', type=int, nargs=1, help='Minimal value for markov order. Use in combination with the next option (max_markov).', required=False) parser.add_argument('-max_markov', '--maxMarkov', metavar='<MAX_MARKOV>', type=int, nargs=1, help='Maximal value for markov order. Use in combination with the previous option (min_markov).', required=False) parser.add_argument('-noov', '--noovDetection', metavar='<NOOV_DETECTION>', type=int, nargs=1, help='No overlapping of oligos allowed if value = 1.', required=False) parser.add_argument('-class_int', '--classInt', metavar='<CLASS_INT>', type=int, nargs=1, help='Class interval for position-analysis. The width of the position classes, in number of bases (default: 20).', required=False) parser.add_argument('-str', '--strSummed', metavar='<STR_SUMMED>', type=int, nargs=1, help='Oligonucleotide occurrences found on both stands are summed (2) or not (1). Default is 2.', required=False) parser.add_argument('-graph_title', '--graphTitle', metavar='<GRAPH_TITLE>', type=str, nargs=1, help='Title displayed on top of the graphs.', required=False) parser.add_argument('-image_format', '--imageFormat', metavar='<IMAGE_FORMAT>', type=str, nargs=1, help='Image format. All the formats supported by XYgraph can be used.', required=False) parser.add_argument('-disco', '--discoAlgorithm', metavar='<DISCO_ALGORITHM>', type=str, nargs='*', help='Specify the software tool(s) that will be used for motif discovery (oligos|dyads|positions|local_words|merged_words). Several algorithms can be specified either by using a comma-separated list of algorithms: -disco oligos,dyads', required=False) parser.add_argument('-source', '--sourceFile', metavar='<SOURCE_FILE>', type=str, nargs=1, help='Enter the source of the fasta sequence file. Supported source: galaxy', required=False) parser.add_argument('-verb', '--verbosity', metavar='<VERBOSITY>', type=int, nargs=1, help='Verbosity.', required=False) parser.add_argument('-ref_motif', '--ref_motif', metavar='<REF_MOTIF>', type=argparse.FileType('r'), nargs=1, help='Motif annotated in some transcription factor database (e.g. RegulonDB, Jaspar, TRANSFAC) for the transcription factor of interest.', required=False) parser.add_argument('-motif_db', '--motif_db', metavar='<MOTIF_DB>', type=str, nargs=1, help='Name of motif database.', required=False) ################################ galaxy arguments ############################################################ parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True) parser.add_argument('-outGalaxy2', '--outGalaxy2', metavar='<OUT_GALAXY2>', type=str, nargs=1, required=False) parser.add_argument('-server', '--server', metavar='<SERVEUR>', type=str, nargs=1, help='RSAT server', required=True) ###########################################################' args = parser.parse_args() ########################################################### ## Test arguments fasta_test_file = args.test_file[0].read() if not args.control_file is None : fasta_control_file = args.control_file[0].read() else : fasta_control_file ="" if not args.ref_motif is None : refMotifValue = args.ref_motif[0].read() else : refMotifValue ="" maxSeqLengthValue = testNone(args.maxSeqLength) maxMotifNumberValue = testNone(args.maxMotifNumber) topPeaksNumber = testNone(args.topPeaks) minLengthNumber = testNone(args.minLength) maxLengthNumber = testNone(args.maxLength) markovModelValue = testNone(args.markovModel) minMarkovValue = testNone(args.minMarkov) maxMarkovValue = testNone(args.maxMarkov) noovValue = testNone(args.noovDetection) classIntValue = testNone(args.classInt) strSummedValue = testNone(args.strSummed) graphTitleValue = testNone(args.graphTitle) imageFormatValue = testNone(args.imageFormat) discoAlgorithmValue = testNone(args.discoAlgorithm) sourceFileValue = testNone(args.sourceFile) verbosityValue = testNone(args.verbosity) motifdbValue = testNone(args.motif_db) outGalaxyValue = testNone(args.outGalaxy) outGalaxyValue2 = testNone(args.outGalaxy2) serverValue = testNone(args.server) ###########################################################' ## Create the SOAP client to request the RSAT service # Define URL for RSAT services url = serverDict[serverValue] print url # Create the client client = Client(url) # Need service interface to perform requests rsat_service = client.service # Define client header userAgent = 'RSAT-Client/v%s (%s; Python %s; %s)' % ( peakMotifsVersion, os.path.basename( __file__ ), platform.python_version(), platform.system() ) httpHeaders = {'User-agent': userAgent} client.set_options(headers=httpHeaders) client.set_options(timeout=300) ###########################################################' ## Create request peakMotifsRequest = { 'test' : fasta_test_file, 'control' : fasta_control_file, 'max_seq_length' : maxSeqLengthValue, 'max_motif_number' : maxMotifNumberValue, 'top_peaks' : topPeaksNumber, 'min_length' : minLengthNumber, 'max_length' : maxLengthNumber, 'markov' : markovModelValue, 'min_markov' : minMarkovValue, 'max_markov' : maxMarkovValue, 'noov' : noovValue, 'class_int' : classIntValue, 'str' : strSummedValue, 'graph_title' : graphTitleValue, 'image_format' : imageFormatValue, 'disco' : discoAlgorithmValue, 'source' : sourceFileValue, 'ref_motif' : refMotifValue, 'verbosity' : verbosityValue, 'motif_db' : motifdbValue #'output' : 'blablabla' } ###########################################################' ## Run job in RSAT server result = call_run_service(rsat_service, peakMotifsRequest) print("###############################################\n") print("Command performed on server\n") print(result.command) print("\n") print("###############################################\n") print("Result\n") print(result.server) ###########################################################' ## Build result URL """ zipFileDict = { "fr_ens":"http://protists.rsat.eu/rsat/", "fr_mrs":"http://fungi.rsat.eu/rsat/", "fr_ro":"http://metazoa.rsat.eu/", "fr_mrs_2":"http://teaching.rsat.eu/rsat/", "es":"http://plants.rsat.eu/rsat/", "mx":"http://prokaryotes.rsat.eu/rsa-tools/" } """ nameFile = "peak-motifs_archive.zip" urlResult=buildZipUrl(result.server) print urlResult ###########################################################' ## Wait RSAT server while urllib.urlopen(urlResult).getcode() != 200: time.sleep(5) folderName=recupRSATResult(urlResult,nameFile) while(not(os.path.exists(folderName+"peak-motifs_synthesis.html"))): os.popen("rm -rf "+folderName) recupRSATResult(urlResult,nameFile) os.popen("cp "+folderName+"peak-motifs_synthesis.html "+outGalaxyValue) ###########################################################' ##Create results folder name # Create results folder outGalaxyValueDir = outGalaxyValue.replace(".dat","_files") os.popen("mkdir "+outGalaxyValueDir) # Copy results files in results folder os.popen("cp "+nameFile+" "+outGalaxyValueDir+"/"+nameFile) os.popen("cp -R "+folderName+"* " + outGalaxyValueDir+"/") if not outGalaxyValue2 =="": os.popen("cp "+folderName+"results/sites/peak-motifs_all_motifs_seqcoord.bed "+outGalaxyValue2)