Mercurial > repos > brian-craft > cancer_browser
changeset 0:78cf37bd389d draft default tip
Uploaded
author | brian-craft |
---|---|
date | Fri, 21 Jun 2013 13:27:39 -0400 |
parents | |
children | |
files | ucsc_cancer_download.py ucsc_cancerbrowser.xml |
diffstat | 2 files changed, 108 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ucsc_cancer_download.py Fri Jun 21 13:27:39 2013 -0400 @@ -0,0 +1,83 @@ +#!/usr/bin/env python +import socket, urllib, sys, os +from galaxy import eggs #eggs needs to be imported so that galaxy.util can find docutils egg... +from galaxy.util.json import from_json_string, to_json_string +from galaxy.util import get_charset_from_http_headers +import galaxy.model # need to import model before sniff to resolve a circular import dependency +from galaxy.datatypes import sniff +import tarfile +import re + +filemap = [ + ('genomic', r'genomic(Segment|Matrix)$'), + ('clinical', r'clinicalMatrix$'), + ] + +files = { + 'genomic': sys.argv[1], + 'clinical': sys.argv[2] + } + +max_file_size = sys.argv[3] + +def file_type(file): + with open(file) as f: + return from_json_string(f.read())['type'] + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def load_input_parameters( filename, erase_file = True ): + datasource_params = {} + try: + json_params = from_json_string( open( filename, 'r' ).read() ) + datasource_params = json_params.get( 'param_dict' ) + except: + json_params = None + for line in open( filename, 'r' ): + try: + line = line.strip() + fields = line.split( '\t' ) + datasource_params[ fields[0] ] = fields[1] + except: + continue + if erase_file: + open( filename, 'w' ).close() #open file for writing, then close, removes params from file + return json_params, datasource_params + +def load_file(files): + filename = files['genomic'] + job_params, params = load_input_parameters( filename, False ) + URL = params.get( 'URL', None ) #using exactly URL indicates that only one dataset is being downloaded + URL_method = params.get( 'URL_method', None ) + socket.setdefaulttimeout( 600 ) + try: + if not URL_method or URL_method == 'get': + page = urllib.urlopen( URL ) + elif URL_method == 'post': + page = urllib.urlopen( URL, urllib.urlencode( params ) ) + except Exception, e: + stop_err( 'The remote data source application may be off line, please try again later. Error: %s' % str( e ) ) + if max_file_size: + file_size = int( page.info().get( 'Content-Length', 0 ) ) + if file_size > max_file_size: + stop_err( 'The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.' % ( file_size, max_file_size ) ) + try: + cur_filename, is_multi_byte = sniff.stream_to_open_named_file( page, os.open( filename, os.O_WRONLY | os.O_CREAT ), filename, source_encoding=get_charset_from_http_headers( page.headers ) ) + except Exception, e: + stop_err( 'Unable to fetch %s:\n%s' % ( URL, e ) ) + +load_file(files) + +tar = tarfile.open(files['genomic']) +names = tar.getnames() +metafiles = [n for n in names if n.endswith('.json')] +tar.extractall() +withtype = [(file_type(file), file[0:-len(".json")]) for file in metafiles] +try: + renames = [((n for (t, n) in withtype if re.search(pat, t)).next(), name) for (name, pat) in filemap] +except StopIteration: + stop_err( 'Missing required file type in tarball' ) +for (frm, to) in renames: + os.rename(frm, files[to])
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ucsc_cancerbrowser.xml Fri Jun 21 13:27:39 2013 -0400 @@ -0,0 +1,25 @@ +<?xml version="1.0"?> +<!-- + If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in + the initial response. If value of 'URL_method' is 'post', any additional params coming back in the + initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. +--> +<tool name="UCSC Cancer Genomics Browser" id="ucsc_cancer_browser1" tool_type="data_source"> + <description>server</description> + <command interpreter="python">ucsc_cancer_download.py $genomic $clinical $__app__.config.output_size_limit</command> + <inputs action="https://genome-cancer.ucsc.edu/proj/site/hgHeatmap" check_values="false" method="get"> + <display>go to UCSC Cancer Browser $GALAXY_URL</display> + <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> + <param name="tool_id" type="hidden" value="ucsc_cancer_browser1" /> + </inputs> + <request_param_translation> + <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> + <request_param galaxy_name="URL" remote_name="URL" missing="" /> + </request_param_translation> + <uihints minwidth="800"/> + <outputs> + <data name="genomic" format="tabular" label="#echo $URL.rsplit('/',1)[1].rsplit('.',1)[0]+' genomic'" /> + <data name="clinical" format="tabular" label="#echo $URL.rsplit('/',1)[1].rsplit('.',1)[0]+' clinical'"/> + </outputs> + <options sanitize="False" refresh="True"/> +</tool>