Mercurial > repos > devteam > ucsc_custom_track
changeset 0:783448899c63 draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 28 Jul 2014 11:30:10 -0400 |
parents | |
children | 2e057d4433b2 |
files | build_ucsc_custom_track.py build_ucsc_custom_track.xml build_ucsc_custom_track_code.py test-data/build_ucsc_custom_track_out1.customtrack test-data/customTrack1.bed |
diffstat | 5 files changed, 216 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build_ucsc_custom_track.py Mon Jul 28 11:30:10 2014 -0400 @@ -0,0 +1,99 @@ +#!/usr/bin/env python +""" +Build a UCSC genome browser custom track file +""" + +import sys, os + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +FILE_TYPE_TO_TRACK_TYPE = { 'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph':'bedGraph', 'wig':'wiggle_0' } +CHUNK_SIZE = 2**20 #1mb + +def get_track_line_is_interval( file_type, name, description, color, visibility ): + if file_type in FILE_TYPE_TO_TRACK_TYPE: + track_type = FILE_TYPE_TO_TRACK_TYPE[ file_type ] + is_interval = False + else: + track_type = None + is_interval = True + track_line = 'track ' + if track_type: + track_line += 'type=%s ' % ( track_type ) + track_line += 'name="%s" description="%s" color=%s visibility=%s\n' % ( name, description, color, visibility ) + return track_line, is_interval + +args = sys.argv[1:] + +out_fname = args.pop(0) +out = open( out_fname, "w" ) + +num_tracks = 0 +skipped_lines = 0 +first_invalid_line = 0 +while args: + # Suck in one dataset worth of arguments + in_fname = args.pop(0) + file_type = args.pop(0) + colspec = args.pop(0) + name = args.pop(0) + description = args.pop(0) + color = args.pop(0).replace( '-', ',' ) + visibility = args.pop(0) + track_line, is_interval = get_track_line_is_interval( file_type, name, description, color, visibility ) + # Do the work + in_file = open( in_fname ) + out.write( track_line ) + if not is_interval: + while True: + chunk = in_file.read( CHUNK_SIZE ) + if chunk: + out.write( chunk ) + else: + break + else: + # Assume type is interval (don't pass this script anything else!) + try: + c, s, e, st = [ int( x ) - 1 for x in colspec.split( "," ) ] + except: + try: + c, s, e = [ int( x ) - 1 for x in colspec.split( "," )[:3] ] + st = -1 #strand column is absent + except: + stop_err( "Columns in interval file invalid for UCSC custom track." ) + + i = 0 + for i, line in enumerate( in_file ): + line = line.rstrip( '\r\n' ) + if line and not line.startswith( '#' ): + fields = line.split( "\t" ) + if st > 0: + #strand column is present + try: + out.write( "%s\t%s\t%s\t%d\t0\t%s\n" % ( fields[c], fields[s], fields[e], i, fields[st] ) ) + except: + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i+1 + else: + try: + out.write( "%s\t%s\t%s\n" % ( fields[c], fields[s], fields[e] ) ) + except: + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i+1 + out.write( "\n" ) #separating newline + num_tracks += 1 + +out.close() + +print "Generated a custom track containing %d subtracks." % num_tracks +if skipped_lines: + print "Skipped %d invalid lines starting at #%d" % ( skipped_lines, first_invalid_line ) + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build_ucsc_custom_track.xml Mon Jul 28 11:30:10 2014 -0400 @@ -0,0 +1,82 @@ +<tool id="build_ucsc_custom_track_1" name="Build custom track" version="1.0.0"> + <description>for UCSC genome browser</description> + <command interpreter="python"> + build_ucsc_custom_track.py + "$out_file1" + #for $t in $tracks + "${t.input.file_name}" + "${t.input.ext}" + #if $t.input.ext == "interval" + ${t.input.metadata.chromCol},${t.input.metadata.startCol},${t.input.metadata.endCol},${t.input.metadata.strandCol} + #else + "NA" + #end if + "${t.name}" + "${t.description}" + "${t.color}" + "${t.visibility}" + #end for + </command> + <inputs> + <repeat name="tracks" title="Track"> + <param name="input" type="data" format="interval,wig" label="Dataset"/> + <param name="name" type="text" size="15" value="User Track"> + <validator type="length" max="15"/> + </param> + <param name="description" type="text" value="User Supplied Track (from Galaxy)"> + <validator type="length" max="60"/> + </param> + <param label="Color" name="color" type="select"> + <option selected="yes" value="0-0-0">Black</option> + <option value="255-0-0">Red</option> + <option value="0-255-0">Green</option> + <option value="0-0-255">Blue</option> + <option value="255-0-255">Magenta</option> + <option value="0-255-255">Cyan</option> + <option value="255-215-0">Gold</option> + <option value="160-32-240">Purple</option> + <option value="255-140-0">Orange</option> + <option value="255-20-147">Pink</option> + <option value="92-51-23">Dark Chocolate</option> + <option value="85-107-47">Olive green</option> + </param> + <param label="Visibility" name="visibility" type="select"> + <option selected="yes" value="1">Dense</option> + <option value="2">Full</option> + <option value="3">Pack</option> + <option value="4">Squish</option> + <option value="0">Hide</option> + </param> + </repeat> + </inputs> + <outputs> + <data format="customtrack" name="out_file1" /> + </outputs> + <tests> + <!--TODO: add a 2nd test here that includes 2 tracks --> + <test> + <param name="input" value="customTrack1.bed" /> + <param name="name" value="User Track" /> + <param name="description" value="User Supplied Track (from Galaxy)" /> + <param name="color" value="0-0-0" /> + <param name="visibility" value="1" /> + <output name="out_file1" file="build_ucsc_custom_track_out1.customtrack" /> + </test> + </tests> +<help> + +.. class:: infomark + +This tool allows you to build custom tracks using datasets in your history for the UCSC genome browser. You can view these custom tracks on the UCSC genome browser by clicking on **display at UCSC main/test** link in the history panel of the output dataset. + +----- + +.. class:: warningmark + +Please note that this tool requires **all input datasets(tracks) to have the same genome build**. The tool throws an error when this requirement is not met. You may then have to choose a valid dataset or remove invalid tracks. + +</help> + +<code file="build_ucsc_custom_track_code.py" /> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build_ucsc_custom_track_code.py Mon Jul 28 11:30:10 2014 -0400 @@ -0,0 +1,21 @@ +# runs after the job (and after the default post-filter) + +# Older py compatibility +try: + set() +except: + from sets import Set as set + +def validate_input( trans, error_map, param_values, page_param_map ): + dbkeys = set() + tracks = param_values['tracks'] + for track in tracks: + if track['input']: + dbkeys.add( track['input'].dbkey ) + if len( dbkeys ) > 1: + # FIXME: Should be able to assume error map structure is created + if 'tracks' not in error_map: + error_map['tracks'] = [ dict() for t in tracks ] + for i in range( len( tracks ) ): + error_map['tracks'][i]['input'] = \ + "All datasets must belong to same genomic build" \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/build_ucsc_custom_track_out1.customtrack Mon Jul 28 11:30:10 2014 -0400 @@ -0,0 +1,8 @@ +track name="User Track" description="User Supplied Track (from Galaxy)" color=0,0,0 visibility=1 +chr2 31608 36385 NM_001077710 0 - 31608 36385 0 2 19,946, 0,3831, +chr2 208154 246341 NM_015677 0 - 208810 232800 0 12 847,57,79,169,129,113,74,60,65,114,111,135, 0,6709,11811,12868,14946,16005,24643,25348,29383,31576,34850,38052, +chr2 254871 262481 NM_001040649 0 + 254964 262258 0 3 136,74,445, 0,6994,7165, +chr2 254895 268280 NM_004300 0 + 254964 267304 0 6 112,74,114,62,106,1054, 0,6970,7141,10244,12084,12331, +chr2 254895 268280 NM_007099 0 + 254964 267304 0 6 112,74,114,62,106,1054, 0,6970,7296,10244,12084,12331, +chr2 269928 277837 NM_001002919 0 - 270146 277835 0 6 221,65,81,54,22,53, 0,3182,6194,6361,7654,7856, +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/customTrack1.bed Mon Jul 28 11:30:10 2014 -0400 @@ -0,0 +1,6 @@ +chr2 31608 36385 NM_001077710 0 - 31608 36385 0 2 19,946, 0,3831, +chr2 208154 246341 NM_015677 0 - 208810 232800 0 12 847,57,79,169,129,113,74,60,65,114,111,135, 0,6709,11811,12868,14946,16005,24643,25348,29383,31576,34850,38052, +chr2 254871 262481 NM_001040649 0 + 254964 262258 0 3 136,74,445, 0,6994,7165, +chr2 254895 268280 NM_004300 0 + 254964 267304 0 6 112,74,114,62,106,1054, 0,6970,7141,10244,12084,12331, +chr2 254895 268280 NM_007099 0 + 254964 267304 0 6 112,74,114,62,106,1054, 0,6970,7296,10244,12084,12331, +chr2 269928 277837 NM_001002919 0 - 270146 277835 0 6 221,65,81,54,22,53, 0,3182,6194,6361,7654,7856,