changeset 0:783448899c63 draft

Imported from capsule None
author devteam
date Mon, 28 Jul 2014 11:30:10 -0400
parents
children 2e057d4433b2
files build_ucsc_custom_track.py build_ucsc_custom_track.xml build_ucsc_custom_track_code.py test-data/build_ucsc_custom_track_out1.customtrack test-data/customTrack1.bed
diffstat 5 files changed, 216 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/build_ucsc_custom_track.py	Mon Jul 28 11:30:10 2014 -0400
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+"""
+Build a UCSC genome browser custom track file
+"""
+
+import sys, os
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err( msg ):
+    sys.stderr.write( msg )
+    sys.exit()
+
+FILE_TYPE_TO_TRACK_TYPE = { 'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph':'bedGraph', 'wig':'wiggle_0' }
+CHUNK_SIZE = 2**20 #1mb
+
+def get_track_line_is_interval( file_type, name, description, color, visibility  ):
+    if file_type in FILE_TYPE_TO_TRACK_TYPE:
+        track_type = FILE_TYPE_TO_TRACK_TYPE[ file_type ]
+        is_interval = False
+    else:
+        track_type = None
+        is_interval = True
+    track_line = 'track '
+    if track_type:
+        track_line += 'type=%s ' % ( track_type )
+    track_line += 'name="%s" description="%s" color=%s visibility=%s\n' % ( name, description, color, visibility )
+    return track_line, is_interval
+
+args = sys.argv[1:]
+
+out_fname = args.pop(0)
+out = open( out_fname, "w" )
+
+num_tracks = 0
+skipped_lines = 0
+first_invalid_line = 0
+while args:
+    # Suck in one dataset worth of arguments
+    in_fname = args.pop(0)
+    file_type = args.pop(0)
+    colspec = args.pop(0)
+    name = args.pop(0)
+    description = args.pop(0)
+    color = args.pop(0).replace( '-', ',' )
+    visibility = args.pop(0)
+    track_line, is_interval = get_track_line_is_interval( file_type, name, description, color, visibility  )
+    # Do the work
+    in_file = open( in_fname )
+    out.write( track_line )
+    if not is_interval:
+        while True:
+            chunk = in_file.read( CHUNK_SIZE )
+            if chunk:
+                out.write( chunk )
+            else:
+                break
+    else:
+        # Assume type is interval (don't pass this script anything else!)
+        try:
+            c, s, e, st = [ int( x ) - 1 for x in colspec.split( "," ) ]
+        except:
+            try:
+                c, s, e = [ int( x ) - 1 for x in colspec.split( "," )[:3] ]
+                st = -1    #strand column is absent
+            except:
+                stop_err( "Columns in interval file invalid for UCSC custom track." )
+        
+        i = 0
+        for i, line in enumerate( in_file ):
+            line = line.rstrip( '\r\n' )
+            if line and not line.startswith( '#' ):
+                fields = line.split( "\t" )
+                if st > 0:
+                    #strand column is present
+                    try:
+                        out.write( "%s\t%s\t%s\t%d\t0\t%s\n" % ( fields[c], fields[s], fields[e], i, fields[st] ) )
+                    except:
+                        skipped_lines += 1
+                        if not first_invalid_line:
+                            first_invalid_line = i+1
+                else:
+                    try:
+                       out.write( "%s\t%s\t%s\n" % ( fields[c], fields[s], fields[e] ) )
+                    except:
+                        skipped_lines += 1
+                        if not first_invalid_line:
+                            first_invalid_line = i+1
+    out.write( "\n" ) #separating newline
+    num_tracks += 1
+    
+out.close()
+
+print "Generated a custom track containing %d subtracks." % num_tracks
+if skipped_lines:
+    print "Skipped %d invalid lines starting at #%d" % ( skipped_lines, first_invalid_line )
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/build_ucsc_custom_track.xml	Mon Jul 28 11:30:10 2014 -0400
@@ -0,0 +1,82 @@
+<tool id="build_ucsc_custom_track_1" name="Build custom track" version="1.0.0">
+  <description>for UCSC genome browser</description>    
+  <command interpreter="python">
+    build_ucsc_custom_track.py 
+      "$out_file1"
+      #for $t in $tracks
+        "${t.input.file_name}"
+        "${t.input.ext}"
+        #if $t.input.ext == "interval"
+          ${t.input.metadata.chromCol},${t.input.metadata.startCol},${t.input.metadata.endCol},${t.input.metadata.strandCol}
+        #else
+          "NA"
+        #end if
+        "${t.name}"
+        "${t.description}"
+        "${t.color}"
+        "${t.visibility}"
+      #end for
+  </command>
+  <inputs>
+    <repeat name="tracks" title="Track">
+      <param name="input" type="data" format="interval,wig" label="Dataset"/>
+      <param name="name" type="text" size="15" value="User Track">
+        <validator type="length" max="15"/>
+      </param>
+      <param name="description" type="text" value="User Supplied Track (from Galaxy)">
+        <validator type="length" max="60"/>
+      </param>
+  		<param label="Color" name="color" type="select">
+  			<option selected="yes" value="0-0-0">Black</option>
+  			<option value="255-0-0">Red</option>
+  			<option value="0-255-0">Green</option>
+  			<option value="0-0-255">Blue</option>
+  			<option value="255-0-255">Magenta</option>
+  			<option value="0-255-255">Cyan</option>
+  			<option value="255-215-0">Gold</option>
+  			<option value="160-32-240">Purple</option>
+  			<option value="255-140-0">Orange</option>
+  			<option value="255-20-147">Pink</option>
+  			<option value="92-51-23">Dark Chocolate</option>
+  			<option value="85-107-47">Olive green</option>
+  		</param>
+  		<param label="Visibility" name="visibility" type="select">
+  			<option selected="yes" value="1">Dense</option>
+  			<option value="2">Full</option>
+  			<option value="3">Pack</option>
+  			<option value="4">Squish</option>
+  			<option value="0">Hide</option>
+  		</param>
+    </repeat>
+  </inputs>
+	<outputs>
+  	<data format="customtrack" name="out_file1" />
+	</outputs>
+    <tests>
+        <!--TODO: add a 2nd test here that includes 2 tracks -->
+        <test>
+  		    <param name="input" value="customTrack1.bed" />
+  		    <param name="name" value="User Track" />
+  		    <param name="description" value="User Supplied Track (from Galaxy)" />
+  		    <param name="color" value="0-0-0" />
+  		    <param name="visibility" value="1" />
+  		    <output name="out_file1" file="build_ucsc_custom_track_out1.customtrack" />
+        </test>
+	</tests>
+<help>
+
+.. class:: infomark
+
+This tool allows you to build custom tracks using datasets in your history for the UCSC genome browser. You can view these custom tracks on the UCSC genome browser by clicking on **display at UCSC main/test** link in the history panel of the output dataset.
+
+-----
+
+.. class:: warningmark
+
+Please note that this tool requires **all input datasets(tracks) to have the same genome build**. The tool throws an error when this requirement is not met. You may then have to choose a valid dataset or remove invalid tracks.
+
+</help>
+
+<code file="build_ucsc_custom_track_code.py" />
+  
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/build_ucsc_custom_track_code.py	Mon Jul 28 11:30:10 2014 -0400
@@ -0,0 +1,21 @@
+# runs after the job (and after the default post-filter)
+
+# Older py compatibility
+try:
+    set()
+except:
+    from sets import Set as set
+
+def validate_input( trans, error_map, param_values, page_param_map ):
+    dbkeys = set()
+    tracks = param_values['tracks']
+    for track in tracks:
+        if track['input']:
+            dbkeys.add( track['input'].dbkey )
+    if len( dbkeys ) > 1:
+        # FIXME: Should be able to assume error map structure is created
+        if 'tracks' not in error_map:
+            error_map['tracks'] = [ dict() for t in tracks ]
+            for i in range( len( tracks ) ):
+                error_map['tracks'][i]['input'] = \
+                    "All datasets must belong to same genomic build"
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/build_ucsc_custom_track_out1.customtrack	Mon Jul 28 11:30:10 2014 -0400
@@ -0,0 +1,8 @@
+track name="User Track" description="User Supplied Track (from Galaxy)" color=0,0,0 visibility=1
+chr2	31608	36385	NM_001077710	0	-	31608	36385	0	2	19,946,	0,3831,
+chr2	208154	246341	NM_015677	0	-	208810	232800	0	12	847,57,79,169,129,113,74,60,65,114,111,135,	0,6709,11811,12868,14946,16005,24643,25348,29383,31576,34850,38052,
+chr2	254871	262481	NM_001040649	0	+	254964	262258	0	3	136,74,445,	0,6994,7165,
+chr2	254895	268280	NM_004300	0	+	254964	267304	0	6	112,74,114,62,106,1054,	0,6970,7141,10244,12084,12331,
+chr2	254895	268280	NM_007099	0	+	254964	267304	0	6	112,74,114,62,106,1054,	0,6970,7296,10244,12084,12331,
+chr2	269928	277837	NM_001002919	0	-	270146	277835	0	6	221,65,81,54,22,53,	0,3182,6194,6361,7654,7856,
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/customTrack1.bed	Mon Jul 28 11:30:10 2014 -0400
@@ -0,0 +1,6 @@
+chr2	31608	36385	NM_001077710	0	-	31608	36385	0	2	19,946,	0,3831,
+chr2	208154	246341	NM_015677	0	-	208810	232800	0	12	847,57,79,169,129,113,74,60,65,114,111,135,	0,6709,11811,12868,14946,16005,24643,25348,29383,31576,34850,38052,
+chr2	254871	262481	NM_001040649	0	+	254964	262258	0	3	136,74,445,	0,6994,7165,
+chr2	254895	268280	NM_004300	0	+	254964	267304	0	6	112,74,114,62,106,1054,	0,6970,7141,10244,12084,12331,
+chr2	254895	268280	NM_007099	0	+	254964	267304	0	6	112,74,114,62,106,1054,	0,6970,7296,10244,12084,12331,
+chr2	269928	277837	NM_001002919	0	-	270146	277835	0	6	221,65,81,54,22,53,	0,3182,6194,6361,7654,7856,