view peakMotifs_wrapper.py @ 52:a1d369ead6d7 draft default tip

Uploaded
author jbrayet
date Tue, 29 Sep 2015 08:25:39 -0400
parents 9e193aa2b9d2
children
line wrap: on
line source

#! /usr/bin/python
# -*- coding: utf8 -*-
"""#Peak Motifs - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr>
#Copyright (C) 2015  Institut Curie
#
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
###########################################################'
# 
#Client to download peak-motifs results from RSAT server.
#
#
#usage: peak-motifs_soap.py [-h] -test <TEST_FILE> [-control <CONTROL_FILE>]
#				[-max_seq_length <MAX_SEQ_LENGTH>]
#				[-max_motif_number <MAX_MOTIF_NUMBER>]
#				[-top_peaks <TOP_PEAKS>] [-min_length <MIN_LENGTH>]
#				[-max_length <MAX_LENGTH>] [-markov <MARKOV_MODEL>]
#				[-min_markov <MIN_MARKOV>]
#				[-max_markov <MAX_MARKOV>] [-noov <NOOV_DETECTION>]
#				[-class_int <CLASS_INT>] [-str <STR_SUMMED>]
#				[-graph_title <GRAPH_TITLE>]
#				[-image_format <IMAGE_FORMAT>]
#				[-disco [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]]]
#				[-source <SOURCE_FILE>] [-verb <VERBOSITY>]
#				[-ref_motif <REF_MOTIF>] -server <SERVEUR>
#
#optional arguments:
#  -h, --help		show this help message and exit
#  -test <TEST_FILE>, --test_file <TEST_FILE>
#			Input test peak sequence in fasta format.
#  -control <CONTROL_FILE>, --control_file <CONTROL_FILE>
#			Input control peak sequence in fasta format.
#  -max_seq_length <MAX_SEQ_LENGTH>, --maxSeqLength <MAX_SEQ_LENGTH>
#			Maximal sequence length.
#  -max_motif_number <MAX_MOTIF_NUMBER>, --maxMotifNumber <MAX_MOTIF_NUMBER>
#			Maximal number of motifs (matrices) to return for
#			pattern discovery algorithms.
#  -top_peaks <TOP_PEAKS>, --topPeaks <TOP_PEAKS>
#			Restrict the analysis to the N peaks at the top of the
#			input sequence file.
#  -min_length <MIN_LENGTH>, --minLength <MIN_LENGTH>
#			Minimal oligonucleotide length.
#  -max_length <MAX_LENGTH>, --maxLength <MAX_LENGTH>
#			Maximal oligonucleotide length.
#  -markov <MARKOV_MODEL>, --markovModel <MARKOV_MODEL>
#			Order of the Markov model used to estimatd expected
#			oligonucleotide frequencies for oligo-analysis and
#			local-word-analysis.
#  -min_markov <MIN_MARKOV>, --minMarkov <MIN_MARKOV>
#			Minimal value for markov order. Use in combination
#			with the next option (max_markov).
#  -max_markov <MAX_MARKOV>, --maxMarkov <MAX_MARKOV>
#			Maximal value for markov order. Use in combination
#			with the previous option (min_markov).
#  -noov <NOOV_DETECTION>, --noovDetection <NOOV_DETECTION>
#			No overlapping of oligos allowed if value = 1.
#  -class_int <CLASS_INT>, --classInt <CLASS_INT>
#			Class interval for position-analysis. The width of the
#			position classes, in number of bases (default: 20).
#  -str <STR_SUMMED>, --strSummed <STR_SUMMED>
#			Oligonucleotide occurrences found on both stands are
#			summed (2) or not (1). Default is 2.
#  -graph_title <GRAPH_TITLE>, --graphTitle <GRAPH_TITLE>
#			Title displayed on top of the graphs.
#  -image_format <IMAGE_FORMAT>, --imageFormat <IMAGE_FORMAT>
#			Image format. All the formats supported by XYgraph can
#			be used.
#  -disco [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]], --discoAlgorithm [<DISCO_ALGORITHM> [<DISCO_ALGORITHM> ...]]
#			Specify the software tool(s) that will be used for
#			motif discovery
#			(oligos|dyads|positions|local_words|merged_words).
#			Several algorithms can be specified either by using a
#			comma-separated list of algorithms: -disco
#			oligos,dyads
#  -source <SOURCE_FILE>, --sourceFile <SOURCE_FILE>
#			Enter the source of the fasta sequence file. Supported
#			source: galaxy
#  -verb <VERBOSITY>, --verbosity <VERBOSITY>
#			Verbosity.
#  -ref_motif <REF_MOTIF>, --ref_motif <REF_MOTIF>
#			Motif annotated in some transcription factor database
#			(e.g. RegulonDB, Jaspar, TRANSFAC) for the
#			transcription factor of interest.
#  -server <SERVEUR>, --server <SERVEUR>
#			RSAT server
#  -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY>
#
#Version 2.0 - 30/01/2015 - Adapted from Jocelyn Brayet, France Genomique team
#
###########################################################"""
__author__ =  'Jocelyn Brayet'

###########################################################'
## Import

import argparse
import os
import urllib
import zipfile
import time
import platform
from suds.client import Client

################################ functions ############################################################
## Define a function to make a service perform the desired request using provided arguments
def call_run_service(service, args):
	"""
	Run job in RSAT server.
		service -> RSAT web service
		args -> web service request 	 
	"""
	
	result = rsat_service.peak_motifs(args)
	return result

def testNone(argument):
	"""
	Test if argument is None or not.
		argument -> argument give by user
	"""

	if not argument is None:
		variable = argument[0]
	else:
		variable = ""
	return variable


###########################################################'
## Functions to recup results

def buildZipUrl(algoResults):
	"""
	Recup results give by RSAT server.
		algoResults -> response gave by RSAT server
	"""
	
	recupResult = str(algoResults)
	tabResults=recupResult.split("\n")
	urlZip = tabResults[4].replace("\t","")
				
	return urlZip

def recupRSATResult(urlResult,nameFile):
	
	"""
	Recup results give by RSAT server.
		urlResult -> URL gave by RSAT server
		nameFile -> name of zip file in Galaxy path
	"""

	###########################################################'
	## Download RSAT results
	urllib.urlretrieve(urlResult, nameFile)

	###########################################################'
	## Decompress results
	zfile = zipfile.ZipFile(nameFile, 'r')

	tempflag = 0
	folderName =""

	for i in zfile.namelist():  ## On parcourt l'ensemble des fichiers de l'archive
	
		#logFile.write(i+"\n")
		###############################
			if tempflag ==0:
				folderName = i
		
			tempflag = 1
		###############################
		
			if i.endswith('/'):   ## S'il s'agit d'un repertoire, on se contente de creer le dossier 
				os.makedirs(i)
			else: 
				data = zfile.read(i)	## lecture du fichier compresse 
				fp = open(i, "wb")	  ## creation en local du nouveau fichier 
				fp.write(data)		  ## ajout des donnees du fichier compresse dans le fichier local 
				fp.close() 
	zfile.close()

	return folderName

## Tested with python 2.6.6
peakMotifsVersion = '2.0 - 30/01/2015'

###########################################################'
# server dictionary
serverDict = {

	"fr_ens":"http://rsat01.biologie.ens.fr/rsat/web_services/RSATWS.wsdl",
	"fr_mrs":"http://rsat-tagc.univ-mrs.fr/rsat/web_services/RSATWS.wsdl",
	"fr_ro":"http://rsat.sb-roscoff.fr/web_services/RSATWS.wsdl",
	"fr_mrs_2":"http://pedagogix-tagc.univ-mrs.fr/rsat/web_services/RSATWS.wsdl",
	"es":"http://floresta.eead.csic.es/rsat/web_services/RSATWS.wsdl",
	"mx":"http://embnet.ccg.unam.mx/rsa-tools/web_services/RSATWS.wsdl"

	}

"""
serverDict = {

	"fr_ens":"http://protists.rsat.eu/rsat/web_services/RSATWS.wsdl",
	"fr_mrs":"http://fungi.rsat.eu/rsat/web_services/RSATWS.wsdl",
	"fr_ro":"http://metazoa.rsat.eu/web_services/RSATWS.wsdl",
	"fr_mrs_2":"http://teaching.rsat.eu/rsat/web_services/RSATWS.wsdl",
	"es":"http://plants.rsat.eu/rsat/web_services/RSATWS.wsdl",
	"mx":"http://prokaryotes.rsat.eu/rsa-tools/web_services/RSATWS.wsdl"
	
	}
"""

if __name__ == '__main__':

	########### peak motifs arguments ####################
	parser = argparse.ArgumentParser(description='Client to download peak-motifs results from RSAT server.', epilog='Version '+peakMotifsVersion)
	
	parser.add_argument('-test', '--test_file', metavar='<TEST_FILE>', type=argparse.FileType('r'), nargs=1, help='Input test peak sequence in fasta format.', required=True)
	parser.add_argument('-control', '--control_file', metavar='<CONTROL_FILE>', type=argparse.FileType('r'), nargs=1, help='Input control peak sequence in fasta format.', required=False)
	parser.add_argument('-max_seq_length', '--maxSeqLength', metavar='<MAX_SEQ_LENGTH>', type=int, nargs=1, help='Maximal sequence length.', required=False)
	parser.add_argument('-max_motif_number', '--maxMotifNumber', metavar='<MAX_MOTIF_NUMBER>', type=int, nargs=1, help='Maximal number of motifs (matrices) to return for pattern discovery algorithms.', required=False)
	parser.add_argument('-top_peaks', '--topPeaks', metavar='<TOP_PEAKS>', type=int, nargs=1, help='Restrict the analysis to the N peaks at the top of the input sequence file.', required=False)
	parser.add_argument('-min_length', '--minLength', metavar='<MIN_LENGTH>', type=int, nargs=1, help='Minimal oligonucleotide length.', required=False)
	parser.add_argument('-max_length', '--maxLength', metavar='<MAX_LENGTH>', type=int, nargs=1, help='Maximal oligonucleotide length.', required=False)
	parser.add_argument('-markov', '--markovModel', metavar='<MARKOV_MODEL>', type=int, nargs=1, help='Order of the Markov model used to estimatd expected oligonucleotide frequencies for oligo-analysis and local-word-analysis.', required=False)
	parser.add_argument('-min_markov', '--minMarkov', metavar='<MIN_MARKOV>', type=int, nargs=1, help='Minimal value for markov order. Use in combination with the next option (max_markov).', required=False)
	parser.add_argument('-max_markov', '--maxMarkov', metavar='<MAX_MARKOV>', type=int, nargs=1, help='Maximal value for markov order. Use in combination with the previous option (min_markov).', required=False)
	parser.add_argument('-noov', '--noovDetection', metavar='<NOOV_DETECTION>', type=int, nargs=1, help='No overlapping of oligos allowed if value = 1.', required=False)
	parser.add_argument('-class_int', '--classInt', metavar='<CLASS_INT>', type=int, nargs=1, help='Class interval for position-analysis. The width of the position classes, in number of bases (default: 20).', required=False)
	parser.add_argument('-str', '--strSummed', metavar='<STR_SUMMED>', type=int, nargs=1, help='Oligonucleotide occurrences found on both stands are summed (2) or not (1). Default is 2.', required=False)
	parser.add_argument('-graph_title', '--graphTitle', metavar='<GRAPH_TITLE>', type=str, nargs=1, help='Title displayed on top of the graphs.', required=False)
	parser.add_argument('-image_format', '--imageFormat', metavar='<IMAGE_FORMAT>', type=str, nargs=1, help='Image format. All the formats supported by XYgraph can be used.', required=False)
	parser.add_argument('-disco', '--discoAlgorithm', metavar='<DISCO_ALGORITHM>', type=str, nargs='*', help='Specify the software tool(s) that will be used for motif discovery (oligos|dyads|positions|local_words|merged_words). Several algorithms can be specified either by using a comma-separated list of algorithms: -disco oligos,dyads', required=False)
	parser.add_argument('-source', '--sourceFile', metavar='<SOURCE_FILE>', type=str, nargs=1, help='Enter the source of the fasta sequence file. Supported source: galaxy', required=False)
	parser.add_argument('-verb', '--verbosity', metavar='<VERBOSITY>', type=int, nargs=1, help='Verbosity.', required=False)
	parser.add_argument('-ref_motif', '--ref_motif', metavar='<REF_MOTIF>', type=argparse.FileType('r'), nargs=1, help='Motif annotated in some transcription factor database (e.g. RegulonDB, Jaspar, TRANSFAC) for the transcription factor of interest.', required=False)
	parser.add_argument('-motif_db', '--motif_db', metavar='<MOTIF_DB>', type=str, nargs=1, help='Name of motif database.', required=False)

	################################ galaxy arguments ############################################################
	parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True)
        parser.add_argument('-outGalaxy2', '--outGalaxy2', metavar='<OUT_GALAXY2>', type=str, nargs=1, required=False)
	parser.add_argument('-server', '--server', metavar='<SERVEUR>', type=str, nargs=1, help='RSAT server', required=True)
	###########################################################'

	args = parser.parse_args()

	###########################################################
	## Test arguments

	fasta_test_file = args.test_file[0].read()
	
	if not args.control_file is None :
		fasta_control_file = args.control_file[0].read()
	else :
		fasta_control_file =""
   
	if not args.ref_motif is None :
		refMotifValue = args.ref_motif[0].read()
	else :
		refMotifValue =""

	maxSeqLengthValue = testNone(args.maxSeqLength)
	maxMotifNumberValue = testNone(args.maxMotifNumber)
	topPeaksNumber = testNone(args.topPeaks)
	minLengthNumber = testNone(args.minLength)
	maxLengthNumber = testNone(args.maxLength)
	markovModelValue = testNone(args.markovModel)
	minMarkovValue = testNone(args.minMarkov)
	maxMarkovValue = testNone(args.maxMarkov)
	noovValue = testNone(args.noovDetection)
	classIntValue = testNone(args.classInt)
	strSummedValue = testNone(args.strSummed)
	graphTitleValue = testNone(args.graphTitle)
	imageFormatValue = testNone(args.imageFormat)
	discoAlgorithmValue = testNone(args.discoAlgorithm)
	sourceFileValue = testNone(args.sourceFile)
	verbosityValue = testNone(args.verbosity)
	motifdbValue = testNone(args.motif_db)
	outGalaxyValue = testNone(args.outGalaxy)
        outGalaxyValue2 = testNone(args.outGalaxy2)
	serverValue = testNone(args.server)

	###########################################################'
	## Create the SOAP client to request the RSAT service

	# Define URL for RSAT services 
	url =  serverDict[serverValue]
	print url

	# Create the client
	client = Client(url)

	# Need service interface to perform requests
	rsat_service = client.service

	# Define client header
	userAgent = 'RSAT-Client/v%s (%s; Python %s; %s)' % (
		peakMotifsVersion, 
		os.path.basename( __file__ ),
		platform.python_version(), 
		platform.system()
	)

	httpHeaders = {'User-agent': userAgent}
	client.set_options(headers=httpHeaders)
	client.set_options(timeout=300)


	###########################################################'
	## Create request
	peakMotifsRequest = {
	
		'test' : fasta_test_file,
		'control' : fasta_control_file,
		'max_seq_length' : maxSeqLengthValue,
		'max_motif_number' : maxMotifNumberValue,
		'top_peaks' : topPeaksNumber,
		'min_length' : minLengthNumber,
		'max_length' : maxLengthNumber,
		'markov' : markovModelValue,
		'min_markov' : minMarkovValue,
		'max_markov' : maxMarkovValue,
		'noov' : noovValue,
		'class_int' : classIntValue,
		'str' : strSummedValue,
		'graph_title' : graphTitleValue,
		'image_format' : imageFormatValue,
		'disco' : discoAlgorithmValue,
		'source' : sourceFileValue,
		'ref_motif' : refMotifValue,
		'verbosity' : verbosityValue,
		'motif_db' : motifdbValue
		#'output' : 'blablabla'
	
	}


	###########################################################'
	## Run job in RSAT server
	result = call_run_service(rsat_service, peakMotifsRequest)

	print("###############################################\n")
	print("Command performed on server\n")
	print(result.command)
	print("\n")
	print("###############################################\n")
	print("Result\n")
	print(result.server)

	###########################################################'
	## Build result URL

	"""
	zipFileDict = {
	
		"fr_ens":"http://protists.rsat.eu/rsat/",
		"fr_mrs":"http://fungi.rsat.eu/rsat/",
		"fr_ro":"http://metazoa.rsat.eu/",
		"fr_mrs_2":"http://teaching.rsat.eu/rsat/",
		"es":"http://plants.rsat.eu/rsat/",
		"mx":"http://prokaryotes.rsat.eu/rsa-tools/"
	
		}
	"""

	nameFile = "peak-motifs_archive.zip"
	urlResult=buildZipUrl(result.server)
	print urlResult

	###########################################################'
	## Wait RSAT server
	while urllib.urlopen(urlResult).getcode() != 200:
		time.sleep(5)

	folderName=recupRSATResult(urlResult,nameFile)
	
	while(not(os.path.exists(folderName+"peak-motifs_synthesis.html"))):
		os.popen("rm -rf "+folderName)
		recupRSATResult(urlResult,nameFile)

        os.popen("cp "+folderName+"peak-motifs_synthesis.html "+outGalaxyValue)

        ###########################################################'
        ##Create results folder name

        # Create results folder
	outGalaxyValueDir = outGalaxyValue.replace(".dat","_files")
	os.popen("mkdir "+outGalaxyValueDir)

        # Copy results files in results folder

	os.popen("cp "+nameFile+" "+outGalaxyValueDir+"/"+nameFile)
        os.popen("cp -R "+folderName+"* " + outGalaxyValueDir+"/")
        
        if not outGalaxyValue2 =="":
            os.popen("cp "+folderName+"results/sites/peak-motifs_all_motifs_seqcoord.bed "+outGalaxyValue2)