# HG changeset patch
# User blankenberg
# Date 1422302056 18000
# Node ID 0e5299e77334f4e09f90160f340dcd150c53cb70
# Parent d59e1d23b38862177107dca40eeda1a914c860e4
Uploaded
diff -r d59e1d23b388 -r 0e5299e77334 data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py
--- a/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py Wed Jul 02 00:35:38 2014 -0400
+++ b/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py Mon Jan 26 14:54:16 2015 -0500
@@ -14,7 +14,7 @@
import gzip
import bz2
-from galaxy.util.json import from_json_string, to_json_string
+from json import loads, dumps
CHUNK_SIZE = 2**20 #1mb
@@ -55,7 +55,7 @@
def _get_stream_readers_for_tar( file_obj, tmp_dir ):
fasta_tar = tarfile.open( fileobj=file_obj, mode='r:*' )
- return [ fasta_tar.extractfile( member ) for member in fasta_tar.getmembers() ]
+ return filter( lambda x: x is not None, [ fasta_tar.extractfile( member ) for member in fasta_tar.getmembers() ] )
def _get_stream_readers_for_zip( file_obj, tmp_dir ):
fasta_zip = zipfile.ZipFile( file_obj, 'r' )
@@ -177,7 +177,6 @@
def download_from_ucsc( data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name ):
UCSC_FTP_SERVER = 'hgdownload.cse.ucsc.edu'
- UCSC_CHROM_FA_FILENAME = 'chromFa'
UCSC_DOWNLOAD_PATH = '/goldenPath/%s/bigZips/'
COMPRESSED_EXTENSIONS = [ ( '.tar.gz', _get_stream_readers_for_tar ), ( '.tar.bz2', _get_stream_readers_for_tar ), ( '.zip', _get_stream_readers_for_zip ), ( '.fa.gz', _get_stream_readers_for_gzip ), ( '.fa.bz2', _get_stream_readers_for_bz2 ) ]
@@ -186,6 +185,8 @@
email = 'anonymous@example.com'
ucsc_dbkey = params['param_dict']['reference_source']['requested_dbkey'] or dbkey
+ UCSC_CHROM_FA_FILENAMES = [ '%s.chromFa' % ucsc_dbkey, 'chromFa' ]
+
ftp = FTP( UCSC_FTP_SERVER )
ftp.login( 'anonymous', email )
@@ -195,9 +196,13 @@
ucsc_file_name = None
get_stream_reader = None
ext = None
- for ext, get_stream_reader in COMPRESSED_EXTENSIONS:
- if "%s%s" % ( UCSC_CHROM_FA_FILENAME, ext ) in path_contents:
- ucsc_file_name = "%s%s%s" % ( ucsc_path, UCSC_CHROM_FA_FILENAME, ext )
+ ucsc_chrom_fa_filename = None
+ for ucsc_chrom_fa_filename in UCSC_CHROM_FA_FILENAMES:
+ for ext, get_stream_reader in COMPRESSED_EXTENSIONS:
+ if "%s%s" % ( ucsc_chrom_fa_filename, ext ) in path_contents:
+ ucsc_file_name = "%s%s%s" % ( ucsc_path, ucsc_chrom_fa_filename, ext )
+ break
+ if ucsc_file_name:
break
if not ucsc_file_name:
@@ -205,7 +210,7 @@
tmp_dir = tempfile.mkdtemp( prefix='tmp-data-manager-ucsc-' )
- ucsc_fasta_filename = os.path.join( tmp_dir, "%s%s" % ( UCSC_CHROM_FA_FILENAME, ext ) )
+ ucsc_fasta_filename = os.path.join( tmp_dir, "%s%s" % ( ucsc_chrom_fa_filename, ext ) )
fasta_base_filename = "%s.fa" % sequence_id
fasta_filename = os.path.join( target_directory, fasta_base_filename )
@@ -237,7 +242,7 @@
requested_identifier = params['param_dict']['reference_source']['requested_identifier']
url = NCBI_DOWNLOAD_URL % requested_identifier
- fasta_reader = urllib2.urlopen( url )
+ fasta_readers = urllib2.urlopen( url )
for data_table_name, data_table_entry in _stream_fasta_to_file( fasta_readers, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, params ):
if data_table_entry:
@@ -246,7 +251,7 @@
def download_from_url( data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name ):
#TODO: we should automatically do decompression here
urls = filter( bool, map( lambda x: x.strip(), params['param_dict']['reference_source']['user_url'].split( '\n' ) ) )
- fasta_reader = [ urllib2.urlopen( url ) for url in urls ]
+ fasta_readers = [ urllib2.urlopen( url ) for url in urls ]
for data_table_name, data_table_entry in _stream_fasta_to_file( fasta_readers, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, params ):
if data_table_entry:
@@ -256,9 +261,9 @@
#TODO: allow multiple FASTA input files
input_filename = params['param_dict']['reference_source']['input_fasta']
if isinstance( input_filename, list ):
- fasta_reader = [ open( filename, 'rb' ) for filename in input_filename ]
+ fasta_readers = [ open( filename, 'rb' ) for filename in input_filename ]
else:
- fasta_reader = open( input_filename )
+ fasta_readers = open( input_filename )
for data_table_name, data_table_entry in _stream_fasta_to_file( fasta_readers, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, params ):
if data_table_entry:
@@ -271,10 +276,10 @@
data_table_entries = _create_symlink( input_filename, target_directory, dbkey, dbkey_name, sequence_id, sequence_name )
else:
if isinstance( input_filename, list ):
- fasta_reader = [ open( filename, 'rb' ) for filename in input_filename ]
+ fasta_readers = [ open( filename, 'rb' ) for filename in input_filename ]
else:
- fasta_reader = open( input_filename )
- data_table_entries = _stream_fasta_to_file( fasta_reader, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, params )
+ fasta_readers = open( input_filename )
+ data_table_entries = _stream_fasta_to_file( fasta_readers, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, params )
for data_table_name, data_table_entry in data_table_entries:
if data_table_entry:
_add_data_table_entry( data_manager_dict, data_table_entry, data_table_name )
@@ -391,7 +396,7 @@
filename = args[0]
- params = from_json_string( open( filename ).read() )
+ params = loads( open( filename ).read() )
target_directory = params[ 'output_data' ][0]['extra_files_path']
os.mkdir( target_directory )
data_manager_dict = {}
@@ -405,6 +410,6 @@
REFERENCE_SOURCE_TO_DOWNLOAD[ params['param_dict']['reference_source']['reference_source_selector'] ]( data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name )
#save info to json file
- open( filename, 'wb' ).write( to_json_string( data_manager_dict ) )
+ open( filename, 'wb' ).write( dumps( data_manager_dict ) )
if __name__ == "__main__": main()
diff -r d59e1d23b388 -r 0e5299e77334 data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml
--- a/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml Wed Jul 02 00:35:38 2014 -0400
+++ b/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml Mon Jan 26 14:54:16 2015 -0500
@@ -2,9 +2,9 @@
fetching
data_manager_fetch_genome_all_fasta_dbkeys.py "${out_file}"
#if str( $dbkey_source.dbkey_source_selector ) == 'existing':
- --dbkey_description ${ dbkey.get_display_text() }
+ --dbkey_description ${ dbkey_source.dbkey.get_display_text() }
#else
- --dbkey_description "${ dbkey_source.dbkey_name or dbkey_source.dbkey }"
+ --dbkey_description "${ dbkey_source.dbkey_name or $dbkey_source.dbkey }"
#end if
@@ -24,7 +24,6 @@
-
@@ -48,7 +47,7 @@
-
+