# HG changeset patch # User devteam # Date 1406561410 14400 # Node ID 783448899c63f7b3b16698b504ad41903d46ce6c Imported from capsule None diff -r 000000000000 -r 783448899c63 build_ucsc_custom_track.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build_ucsc_custom_track.py Mon Jul 28 11:30:10 2014 -0400 @@ -0,0 +1,99 @@ +#!/usr/bin/env python +""" +Build a UCSC genome browser custom track file +""" + +import sys, os + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +FILE_TYPE_TO_TRACK_TYPE = { 'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph':'bedGraph', 'wig':'wiggle_0' } +CHUNK_SIZE = 2**20 #1mb + +def get_track_line_is_interval( file_type, name, description, color, visibility ): + if file_type in FILE_TYPE_TO_TRACK_TYPE: + track_type = FILE_TYPE_TO_TRACK_TYPE[ file_type ] + is_interval = False + else: + track_type = None + is_interval = True + track_line = 'track ' + if track_type: + track_line += 'type=%s ' % ( track_type ) + track_line += 'name="%s" description="%s" color=%s visibility=%s\n' % ( name, description, color, visibility ) + return track_line, is_interval + +args = sys.argv[1:] + +out_fname = args.pop(0) +out = open( out_fname, "w" ) + +num_tracks = 0 +skipped_lines = 0 +first_invalid_line = 0 +while args: + # Suck in one dataset worth of arguments + in_fname = args.pop(0) + file_type = args.pop(0) + colspec = args.pop(0) + name = args.pop(0) + description = args.pop(0) + color = args.pop(0).replace( '-', ',' ) + visibility = args.pop(0) + track_line, is_interval = get_track_line_is_interval( file_type, name, description, color, visibility ) + # Do the work + in_file = open( in_fname ) + out.write( track_line ) + if not is_interval: + while True: + chunk = in_file.read( CHUNK_SIZE ) + if chunk: + out.write( chunk ) + else: + break + else: + # Assume type is interval (don't pass this script anything else!) + try: + c, s, e, st = [ int( x ) - 1 for x in colspec.split( "," ) ] + except: + try: + c, s, e = [ int( x ) - 1 for x in colspec.split( "," )[:3] ] + st = -1 #strand column is absent + except: + stop_err( "Columns in interval file invalid for UCSC custom track." ) + + i = 0 + for i, line in enumerate( in_file ): + line = line.rstrip( '\r\n' ) + if line and not line.startswith( '#' ): + fields = line.split( "\t" ) + if st > 0: + #strand column is present + try: + out.write( "%s\t%s\t%s\t%d\t0\t%s\n" % ( fields[c], fields[s], fields[e], i, fields[st] ) ) + except: + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i+1 + else: + try: + out.write( "%s\t%s\t%s\n" % ( fields[c], fields[s], fields[e] ) ) + except: + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i+1 + out.write( "\n" ) #separating newline + num_tracks += 1 + +out.close() + +print "Generated a custom track containing %d subtracks." % num_tracks +if skipped_lines: + print "Skipped %d invalid lines starting at #%d" % ( skipped_lines, first_invalid_line ) + + + diff -r 000000000000 -r 783448899c63 build_ucsc_custom_track.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build_ucsc_custom_track.xml Mon Jul 28 11:30:10 2014 -0400 @@ -0,0 +1,82 @@ + + for UCSC genome browser + + build_ucsc_custom_track.py + "$out_file1" + #for $t in $tracks + "${t.input.file_name}" + "${t.input.ext}" + #if $t.input.ext == "interval" + ${t.input.metadata.chromCol},${t.input.metadata.startCol},${t.input.metadata.endCol},${t.input.metadata.strandCol} + #else + "NA" + #end if + "${t.name}" + "${t.description}" + "${t.color}" + "${t.visibility}" + #end for + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +This tool allows you to build custom tracks using datasets in your history for the UCSC genome browser. You can view these custom tracks on the UCSC genome browser by clicking on **display at UCSC main/test** link in the history panel of the output dataset. + +----- + +.. class:: warningmark + +Please note that this tool requires **all input datasets(tracks) to have the same genome build**. The tool throws an error when this requirement is not met. You may then have to choose a valid dataset or remove invalid tracks. + + + + + + diff -r 000000000000 -r 783448899c63 build_ucsc_custom_track_code.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build_ucsc_custom_track_code.py Mon Jul 28 11:30:10 2014 -0400 @@ -0,0 +1,21 @@ +# runs after the job (and after the default post-filter) + +# Older py compatibility +try: + set() +except: + from sets import Set as set + +def validate_input( trans, error_map, param_values, page_param_map ): + dbkeys = set() + tracks = param_values['tracks'] + for track in tracks: + if track['input']: + dbkeys.add( track['input'].dbkey ) + if len( dbkeys ) > 1: + # FIXME: Should be able to assume error map structure is created + if 'tracks' not in error_map: + error_map['tracks'] = [ dict() for t in tracks ] + for i in range( len( tracks ) ): + error_map['tracks'][i]['input'] = \ + "All datasets must belong to same genomic build" \ No newline at end of file diff -r 000000000000 -r 783448899c63 test-data/build_ucsc_custom_track_out1.customtrack --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/build_ucsc_custom_track_out1.customtrack Mon Jul 28 11:30:10 2014 -0400 @@ -0,0 +1,8 @@ +track name="User Track" description="User Supplied Track (from Galaxy)" color=0,0,0 visibility=1 +chr2 31608 36385 NM_001077710 0 - 31608 36385 0 2 19,946, 0,3831, +chr2 208154 246341 NM_015677 0 - 208810 232800 0 12 847,57,79,169,129,113,74,60,65,114,111,135, 0,6709,11811,12868,14946,16005,24643,25348,29383,31576,34850,38052, +chr2 254871 262481 NM_001040649 0 + 254964 262258 0 3 136,74,445, 0,6994,7165, +chr2 254895 268280 NM_004300 0 + 254964 267304 0 6 112,74,114,62,106,1054, 0,6970,7141,10244,12084,12331, +chr2 254895 268280 NM_007099 0 + 254964 267304 0 6 112,74,114,62,106,1054, 0,6970,7296,10244,12084,12331, +chr2 269928 277837 NM_001002919 0 - 270146 277835 0 6 221,65,81,54,22,53, 0,3182,6194,6361,7654,7856, + diff -r 000000000000 -r 783448899c63 test-data/customTrack1.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/customTrack1.bed Mon Jul 28 11:30:10 2014 -0400 @@ -0,0 +1,6 @@ +chr2 31608 36385 NM_001077710 0 - 31608 36385 0 2 19,946, 0,3831, +chr2 208154 246341 NM_015677 0 - 208810 232800 0 12 847,57,79,169,129,113,74,60,65,114,111,135, 0,6709,11811,12868,14946,16005,24643,25348,29383,31576,34850,38052, +chr2 254871 262481 NM_001040649 0 + 254964 262258 0 3 136,74,445, 0,6994,7165, +chr2 254895 268280 NM_004300 0 + 254964 267304 0 6 112,74,114,62,106,1054, 0,6970,7141,10244,12084,12331, +chr2 254895 268280 NM_007099 0 + 254964 267304 0 6 112,74,114,62,106,1054, 0,6970,7296,10244,12084,12331, +chr2 269928 277837 NM_001002919 0 - 270146 277835 0 6 221,65,81,54,22,53, 0,3182,6194,6361,7654,7856,