| 30 | 1 #! /usr/bin/python | 
|  | 2 # -*- coding: utf8 -*- | 
|  | 3 """#Matrix Scan - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr> | 
|  | 4 #Copyright (C) 2015  Institut Curie | 
|  | 5 # | 
|  | 6 #This program is free software: you can redistribute it and/or modify | 
|  | 7 #it under the terms of the GNU General Public License as published by | 
|  | 8 #the Free Software Foundation, either version 3 of the License, or | 
|  | 9 #(at your option) any later version. | 
|  | 10 # | 
|  | 11 #This program is distributed in the hope that it will be useful, | 
|  | 12 #but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 13 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | 14 #GNU General Public License for more details. | 
|  | 15 # | 
|  | 16 #You should have received a copy of the GNU General Public License | 
|  | 17 #along with this program.  If not, see <http://www.gnu.org/licenses/>. | 
|  | 18 # | 
|  | 19 ###########################################################' | 
|  | 20 # | 
|  | 21 #Client to download matrix scan results from RSAT server. | 
|  | 22 # | 
|  | 23 # | 
|  | 24 #usage: matrixScan_wrapper.py [-h] -sequence <SEQUENCE> -matrix <MATRIX> | 
|  | 25 #                             -n_treatment <N_TREATMENT> -markov <MARKOV> | 
|  | 26 #                             -matrix_format <MATRIX_FORMAT> | 
|  | 27 #                             [-organism <ORGANISM>] [-background <BACKGROUND>] | 
|  | 28 #                             [-origin <ORIGIN>] [-return_param <RETURN_PARAM>] | 
|  | 29 #                             [-uth <UTH>] -server <SERVEUR> -outGalaxy | 
|  | 30 #                             <OUT_GALAXY> | 
|  | 31 # | 
|  | 32 #optional arguments: | 
|  | 33 #  -h, --help            show this help message and exit | 
|  | 34 #  -sequence <SEQUENCE>, --sequence <SEQUENCE> | 
|  | 35 #                        Sequence(s) to scan - all the formats supported in | 
|  | 36 #                        RSAT can be used as input (default: fasta). | 
|  | 37 #  -matrix <MATRIX>, --matrix <MATRIX> | 
|  | 38 #                        Matrix/ces to scan with. The matrix format is | 
|  | 39 #                        specified with the option "matrix_format" (see below) | 
|  | 40 #                        Default format: tab. | 
|  | 41 #  -n_treatment <N_TREATMENT>, --n_treatment <N_TREATMENT> | 
|  | 42 #                        Treatment of N characters. These characters are often | 
|  | 43 #                        used in DNA sequences to represent undefined or masked | 
|  | 44 #                        nucleotides (skip or score). | 
|  | 45 #  -markov <MARKOV>, --markov <MARKOV> | 
|  | 46 #                        Order of the markov chain for the background model. | 
|  | 47 #  -matrix_format <MATRIX_FORMAT>, --matrix_format <MATRIX_FORMAT> | 
|  | 48 #                        Supported fields: tab, cb, transfac, jaspar, | 
|  | 49 #                        consensus, gibbs, meme, assembly. | 
|  | 50 #  -organism <ORGANISM>, --organism <ORGANISM> | 
|  | 51 #                        To use a precalculated background model from RSAT, | 
|  | 52 #                        choose the organism corresponding to the background | 
|  | 53 #                        model. | 
|  | 54 #  -background <BACKGROUND>, --background <BACKGROUND> | 
|  | 55 #                        Type of sequences used as background model for | 
|  | 56 #                        estimating expected oligonucleotide frequencies. | 
|  | 57 #                        Supported: upstream, upstream-noorf | 
|  | 58 #  -origin <ORIGIN>, --origin <ORIGIN> | 
|  | 59 #                        Define the origin for the calculation of positions. | 
|  | 60 #  -return_param <RETURN_PARAM>, --return_param <RETURN_PARAM> | 
|  | 61 #                        List of fields to return. | 
|  | 62 #  -uth <UTH>, --uth <UTH> | 
|  | 63 #                        Lower threshold on some parameter. | 
|  | 64 #  -server <SERVEUR>, --server <SERVEUR> | 
|  | 65 #  -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY> | 
|  | 66 # | 
|  | 67 #Version 0.1 - 10/03/2015 | 
|  | 68 # | 
|  | 69 ###########################################################""" | 
|  | 70 __author__ =  'Jocelyn Brayet' | 
|  | 71 matrixScanVersion = '0.1 - 10/03/2015' | 
|  | 72 | 
|  | 73 | 
|  | 74 ###########################################################' | 
|  | 75 ## Import | 
|  | 76 | 
|  | 77 import argparse | 
|  | 78 import os | 
|  | 79 import urllib | 
|  | 80 from suds.client import Client | 
|  | 81 import platform | 
|  | 82 | 
|  | 83 ###########################################################' | 
|  | 84 | 
|  | 85 ################################ functions ############################################################ | 
|  | 86 ## Define a function to make a service perform the desired request using provided arguments | 
|  | 87 def call_run_service(service, args): | 
|  | 88 	""" | 
|  | 89 	Run job in RSAT server. | 
|  | 90 		service -> RSAT web service | 
|  | 91 		args -> web service request | 
|  | 92 	""" | 
|  | 93 | 
|  | 94 	result = rsat_service.matrix_scan(args) | 
|  | 95 	return result | 
|  | 96 | 
|  | 97 def testNone(argument): | 
|  | 98 	""" | 
|  | 99 	Test if argument is None or not. | 
|  | 100 		argument -> argument give by user | 
|  | 101 	""" | 
|  | 102 | 
|  | 103 	if not argument is None: | 
|  | 104 		variable = argument[0] | 
|  | 105 	else: | 
|  | 106 		variable = "" | 
|  | 107 	return variable | 
|  | 108 | 
|  | 109 ###########################################################' | 
|  | 110 | 
|  | 111 ###########################################################' | 
|  | 112 # server dictionary | 
|  | 113 serverDict = { | 
|  | 114 | 
|  | 115     #http://protists.rsat.eu/ | 
|  | 116     "fr_ens":"http://rsat01.biologie.ens.fr/rsa-tools/web_services/RSATWS.wsdl", | 
|  | 117     "fr_mrs":"http://rsat-tagc.univ-mrs.fr/rsat/web_services/RSATWS.wsdl", | 
|  | 118     "fr_ro":"http://rsat.sb-roscoff.fr/web_services/RSATWS.wsdl", | 
|  | 119     "fr_mrs_2":"http://pedagogix-tagc.univ-mrs.fr/rsat/web_services/RSATWS.wsdl", | 
|  | 120     "es":"http://floresta.eead.csic.es/rsat/web_services/RSATWS.wsdl", | 
|  | 121     "mx":"http://embnet.ccg.unam.mx/rsa-tools/web_services/RSATWS.wsdl" | 
|  | 122 | 
|  | 123     } | 
|  | 124 | 
|  | 125 | 
|  | 126 if __name__ == '__main__': | 
|  | 127 | 
|  | 128     parser = argparse.ArgumentParser(description='Client to download matrix-scan results from RSAT server.', epilog='Version '+matrixScanVersion) | 
|  | 129 | 
|  | 130 | 
|  | 131     ########### convert matrix arguments #################### | 
|  | 132 | 
|  | 133     parser.add_argument('-sequence', '--sequence', metavar='<SEQUENCE>', type=argparse.FileType('r'), nargs=1, help='Sequence(s) to scan - all the formats supported in RSAT can be used as input (default: fasta).', required=True) | 
|  | 134     parser.add_argument('-matrix', '--matrix', metavar='<MATRIX>', type=argparse.FileType('r'), nargs=1, help='Matrix/ces to scan with. The matrix format is specified with the option "matrix_format" (see below) Default format: tab.', required=True) | 
|  | 135     parser.add_argument('-n_treatment', '--n_treatment', metavar='<N_TREATMENT>', type=str, nargs=1, help='Treatment of N characters. These characters are often used in DNA sequences to represent undefined or masked nucleotides (skip or score).', required=True) | 
|  | 136     parser.add_argument('-markov', '--markov', metavar='<MARKOV>', type=int, nargs=1, help='Order of the markov chain for the background model.', required=True) | 
|  | 137     parser.add_argument('-matrix_format', '--matrix_format', metavar='<MATRIX_FORMAT>', type=str, nargs=1, help='Supported fields: tab, cb, transfac, jaspar, consensus, gibbs, meme, assembly.', required=True) | 
|  | 138 | 
|  | 139     parser.add_argument('-organism', '--organism', metavar='<ORGANISM>', type=str, nargs=1, help='To use a precalculated background model from RSAT, choose the organism corresponding to the background model.', required=False) | 
|  | 140     parser.add_argument('-background', '--background', metavar='<BACKGROUND>', type=str, nargs=1, help='Type of sequences used as background model for estimating expected oligonucleotide frequencies. Supported: upstream, upstream-noorf', required=False) | 
|  | 141     #parser.add_argument('-background_model', '--background_model', metavar='<BACKGROUND_MODEL>', type=str, nargs=1, help='Background model is a tab-delimited specification of oligonucleotide frequencies.', required=True) | 
|  | 142     parser.add_argument('-origin', '--origin', metavar='<ORIGIN>', type=str, nargs=1, help='Define the origin for the calculation of positions.', required=False) | 
|  | 143     parser.add_argument('-return_param', '--return_param', metavar='<RETURN_PARAM>', type=str, nargs=1, help='List of fields to return.', required=False) | 
|  | 144     parser.add_argument('-uth', '--uth', metavar='<UTH>', type=str, nargs=1, help='Lower threshold on some parameter.', required=False) | 
|  | 145 | 
|  | 146     #parser.add_argument('-quick', '--quick', metavar='<QUICK>', type=str, help='Delegates scanning to the C program matrix-scan-quick.', required=True) | 
|  | 147 | 
|  | 148 | 
|  | 149     ########### galaxy arguments ############################## | 
|  | 150     parser.add_argument('-server', '--server', metavar='<SERVEUR>', type=str, nargs=1, required=True) | 
|  | 151     parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True) | 
|  | 152     ########################################################### | 
|  | 153 | 
|  | 154 | 
|  | 155     args = parser.parse_args() | 
|  | 156 | 
|  | 157 | 
|  | 158 | 
|  | 159 | 
|  | 160     ###########################################################' | 
|  | 161 | 
|  | 162     sequence_file = args.sequence[0].read() | 
|  | 163     matrix_file = args.matrix[0].read() | 
|  | 164     serverValue = testNone(args.server) | 
|  | 165     matrixFormatValue = testNone(args.matrix_format) | 
|  | 166     n_treatmentValue = testNone(args.n_treatment) | 
|  | 167     outGalaxyValue = testNone(args.outGalaxy) | 
|  | 168     markovValue = testNone(args.markov) | 
|  | 169     organismValue = testNone(args.organism) | 
|  | 170     backgroundValue = testNone(args.background) | 
|  | 171     originValue = testNone(args.origin) | 
|  | 172     returnValue = testNone(args.return_param) | 
|  | 173     uthValue = testNone(args.uth) | 
|  | 174 | 
|  | 175     #backgroundModelValue = testNone(args.background_model) | 
|  | 176     #quickValue = testNone(args.quick) | 
|  | 177 | 
|  | 178     ###########################################################' | 
|  | 179     ## Create the SOAP client to request the RSAT service | 
|  | 180 | 
|  | 181 | 
|  | 182     # Load Client class from suds | 
|  | 183 | 
|  | 184     # Define URL for RSAT services | 
|  | 185     url =  serverDict[serverValue] | 
|  | 186     # Create the client | 
|  | 187     client = Client(url) | 
|  | 188 | 
|  | 189     # Need service interface to perform requests | 
|  | 190     rsat_service = client.service | 
|  | 191 | 
|  | 192 | 
|  | 193     #print client | 
|  | 194     #print(client.factory.create('matrix_scan')) | 
|  | 195 | 
|  | 196     #request = | 
|  | 197     #   (MatrixScanRequest){ | 
|  | 198     #       output = None | 
|  | 199     #       sequence = None | 
|  | 200     #       tmp_sequence_infile = None | 
|  | 201     #       matrix = None | 
|  | 202     #       tmp_matrix_infile = None | 
|  | 203     #       sequence_format = None | 
|  | 204     #       matrix_format = None | 
|  | 205     #       quick = None | 
|  | 206     #       n_treatment = None | 
|  | 207     #       consensus_name = None | 
|  | 208     #       pseudo = None | 
|  | 209     #       equi_pseudo = None | 
|  | 210     #       top_matrices = None | 
|  | 211     #       background_model = None | 
|  | 212     #       tmp_background_infile = None | 
|  | 213     #       organism = None | 
|  | 214     #       background = None | 
|  | 215     #       background_input = None | 
|  | 216     #       background_window = None | 
|  | 217     #       markov = None | 
|  | 218     #       background_pseudo = None | 
|  | 219     #       return_fields = None | 
|  | 220     #       sort_distrib = None | 
|  | 221     #       lth[] = <empty> | 
|  | 222     #       uth[] = <empty> | 
|  | 223     #       str = None | 
|  | 224     #       verbosity = None | 
|  | 225     #       origin = None | 
|  | 226     #       decimals = None | 
|  | 227     #       crer_ids = None | 
|  | 228     #   } | 
|  | 229     #} | 
|  | 230 | 
|  | 231     # Define client header | 
|  | 232 | 
|  | 233     userAgent = 'RSAT-Client/v%s (%s; Python %s; %s)' % ( | 
|  | 234         matrixScanVersion, | 
|  | 235         os.path.basename( __file__ ), | 
|  | 236         platform.python_version(), | 
|  | 237         platform.system() | 
|  | 238     ) | 
|  | 239 | 
|  | 240     httpHeaders = {'User-agent': userAgent} | 
|  | 241     client.set_options(headers=httpHeaders) | 
|  | 242     client.set_options(timeout=300) | 
|  | 243 | 
|  | 244 | 
|  | 245     if not uthValue == "": | 
|  | 246         matrixScanRequest = { | 
|  | 247 | 
|  | 248             #'output' : "ticket", | 
|  | 249             'sequence' : sequence_file, | 
|  | 250             'matrix' : matrix_file, | 
|  | 251             'markov' : markovValue, | 
|  | 252             'n_treatment' : n_treatmentValue, | 
|  | 253             'matrix_format' : matrixFormatValue, | 
|  | 254             'background' : backgroundValue, | 
|  | 255             'organism' : organismValue, | 
|  | 256 | 
|  | 257             'origin' : originValue, | 
|  | 258             'return_fields' : returnValue, | 
|  | 259             'uth' : 'pval ' + uthValue, | 
|  | 260 | 
|  | 261             #'background_window' : 12, | 
|  | 262             #'background_input' : " ", | 
|  | 263 | 
|  | 264             'quick' : "-quick" | 
|  | 265 | 
|  | 266         } | 
|  | 267 | 
|  | 268     else: | 
|  | 269         matrixScanRequest = { | 
|  | 270 | 
|  | 271             #'output' : "ticket", | 
|  | 272             'sequence' : sequence_file, | 
|  | 273             'matrix' : matrix_file, | 
|  | 274             'markov' : markovValue, | 
|  | 275             'n_treatment' : n_treatmentValue, | 
|  | 276             'matrix_format' : matrixFormatValue, | 
|  | 277             'background' : backgroundValue, | 
|  | 278             'organism' : organismValue, | 
|  | 279 | 
|  | 280             'origin' : originValue, | 
|  | 281             'return_fields' : returnValue, | 
|  | 282             #'background_window' : 12, | 
|  | 283             #'background_input' : " ", | 
|  | 284 | 
|  | 285             'quick' : "-quick" | 
|  | 286 | 
|  | 287         } | 
|  | 288 | 
|  | 289 | 
|  | 290 | 
|  | 291     #print matrixScanRequest | 
|  | 292 | 
|  | 293     result = call_run_service(rsat_service, matrixScanRequest) | 
|  | 294 | 
|  | 295     print url | 
|  | 296 | 
|  | 297     print "###############################################" | 
|  | 298     print "Command performed on server" | 
|  | 299     print result.command | 
|  | 300     print "###############################################" | 
|  | 301     print "Result" | 
|  | 302     print result.server | 
|  | 303 | 
|  | 304     nameFile = "matrix-scan_results.txt" | 
|  | 305 | 
|  | 306     urlResult=result.server.replace("$RSAT/public_html/",url.replace("web_services/RSATWS.wsdl","")) | 
|  | 307 | 
|  | 308 | 
|  | 309     urllib.urlretrieve(urlResult, nameFile) | 
|  | 310 | 
|  | 311 | 
|  | 312     os.popen("cp "+nameFile+" "+outGalaxyValue) | 
|  | 313 | 
|  | 314 |