Mercurial > repos > devteam > ucsc_custom_track

--- a/build_ucsc_custom_track.py	Tue Oct 13 12:34:12 2015 -0400
+++ b/build_ucsc_custom_track.py	Mon Feb 28 20:05:39 2022 +0000
@@ -3,97 +3,86 @@
 Build a UCSC genome browser custom track file
 """

-import sys, os
+import sys

-assert sys.version_info[:2] >= ( 2, 4 )
+FILE_TYPE_TO_TRACK_TYPE = {'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph': 'bedGraph', 'wig': 'wiggle_0'}
+CHUNK_SIZE = 2**20  # 1 mb

-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()

-FILE_TYPE_TO_TRACK_TYPE = { 'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph':'bedGraph', 'wig':'wiggle_0' }
-CHUNK_SIZE = 2**20 #1mb
-
-def get_track_line_is_interval( file_type, name, description, color, visibility  ):
+def get_track_line_is_interval(file_type, name, description, color, visibility):
     if file_type in FILE_TYPE_TO_TRACK_TYPE:
-        track_type = FILE_TYPE_TO_TRACK_TYPE[ file_type ]
+        track_type = FILE_TYPE_TO_TRACK_TYPE[file_type]
         is_interval = False
     else:
         track_type = None
         is_interval = True
     track_line = 'track '
     if track_type:
-        track_line += 'type=%s ' % ( track_type )
-    track_line += 'name="%s" description="%s" color=%s visibility=%s\n' % ( name, description, color, visibility )
+        track_line += f"type={track_type} "
+    track_line += f'name="{name}" description="{description}" color={color} visibility={visibility}\n'
     return track_line, is_interval

-args = sys.argv[1:]
-
-out_fname = args.pop(0)
-out = open( out_fname, "w" )

 num_tracks = 0
 skipped_lines = 0
 first_invalid_line = 0
-while args:
-    # Suck in one dataset worth of arguments
-    in_fname = args.pop(0)
-    file_type = args.pop(0)
-    colspec = args.pop(0)
-    name = args.pop(0)
-    description = args.pop(0)
-    color = args.pop(0).replace( '-', ',' )
-    visibility = args.pop(0)
-    track_line, is_interval = get_track_line_is_interval( file_type, name, description, color, visibility  )
-    # Do the work
-    in_file = open( in_fname )
-    out.write( track_line )
-    if not is_interval:
-        while True:
-            chunk = in_file.read( CHUNK_SIZE )
-            if chunk:
-                out.write( chunk )
+args = sys.argv[1:]
+out_fname = args.pop(0)
+with open(out_fname, "w") as out:
+    while args:
+        # Suck in one dataset worth of arguments
+        in_fname = args.pop(0)
+        file_type = args.pop(0)
+        colspec = args.pop(0)
+        name = args.pop(0)
+        description = args.pop(0)
+        color = args.pop(0).replace('-', ',')
+        visibility = args.pop(0)
+        track_line, is_interval = get_track_line_is_interval(file_type, name, description, color, visibility)
+        # Do the work
+        out.write(track_line)
+        with open(in_fname) as in_file:
+            if not is_interval:
+                while True:
+                    chunk = in_file.read(CHUNK_SIZE)
+                    if chunk:
+                        out.write(chunk)
+                    else:
+                        break
             else:
-                break
-    else:
-        # Assume type is interval (don't pass this script anything else!)
-        try:
-            c, s, e, st = [ int( x ) - 1 for x in colspec.split( "," ) ]
-        except:
-            try:
-                c, s, e = [ int( x ) - 1 for x in colspec.split( "," )[:3] ]
-                st = -1    #strand column is absent
-            except:
-                stop_err( "Columns in interval file invalid for UCSC custom track." )
-
-        i = 0
-        for i, line in enumerate( in_file ):
-            line = line.rstrip( '\r\n' )
-            if line and not line.startswith( '#' ):
-                fields = line.split( "\t" )
-                if st > 0:
-                    #strand column is present
+                # Assume type is interval (don't pass this script anything else!)
+                try:
+                    c, s, e, st = (int(x) - 1 for x in colspec.split(","))
+                except ValueError:
                     try:
-                        out.write( "%s\t%s\t%s\t%d\t0\t%s\n" % ( fields[c], fields[s], fields[e], i, fields[st] ) )
-                    except:
-                        skipped_lines += 1
-                        if not first_invalid_line:
-                            first_invalid_line = i+1
-                else:
-                    try:
-                       out.write( "%s\t%s\t%s\n" % ( fields[c], fields[s], fields[e] ) )
-                    except:
-                        skipped_lines += 1
-                        if not first_invalid_line:
-                            first_invalid_line = i+1
-    out.write( "\n" ) #separating newline
-    num_tracks += 1
-
-out.close()
+                        c, s, e = (int(x) - 1 for x in colspec.split(",")[:3])
+                        st = -1  # strand column is absent
+                    except Exception:
+                        sys.exit("Columns in interval file invalid for UCSC custom track.")

-print "Generated a custom track containing %d subtracks." % num_tracks
+                i = 0
+                for i, line in enumerate(in_file):
+                    line = line.rstrip('\r\n')
+                    if line and not line.startswith('#'):
+                        fields = line.split("\t")
+                        if st > 0:
+                            # strand column is present
+                            try:
+                                out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\t{i}\t0\t{fields[st]}\n")
+                            except Exception:
+                                skipped_lines += 1
+                                if not first_invalid_line:
+                                    first_invalid_line = i + 1
+                        else:
+                            try:
+                                out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\n")
+                            except Exception:
+                                skipped_lines += 1
+                                if not first_invalid_line:
+                                    first_invalid_line = i + 1
+        out.write("\n")  # separating newline
+        num_tracks += 1
+
+print(f"Generated a custom track containing {num_tracks} subtracks.")
 if skipped_lines:
-    print "Skipped %d invalid lines starting at #%d" % ( skipped_lines, first_invalid_line )
-
-
-
+    print(f"Skipped {skipped_lines} invalid lines starting at #{first_invalid_line}")
--- a/build_ucsc_custom_track.xml	Tue Oct 13 12:34:12 2015 -0400
+++ b/build_ucsc_custom_track.xml	Mon Feb 28 20:05:39 2022 +0000
@@ -1,70 +1,73 @@
-<tool id="build_ucsc_custom_track_1" name="Build custom track" version="1.0.0">
-  <description>for UCSC genome browser</description>
-  <command interpreter="python">
-    build_ucsc_custom_track.py
-      "$out_file1"
-      #for $t in $tracks
-        "${t.input.file_name}"
-        "${t.input.ext}"
-        #if $t.input.ext == "interval"
-          ${t.input.metadata.chromCol},${t.input.metadata.startCol},${t.input.metadata.endCol},${t.input.metadata.strandCol}
-        #else
-          "NA"
-        #end if
-        "${t.name}"
-        "${t.description}"
-        "${t.color}"
-        "${t.visibility}"
-      #end for
-  </command>
-  <inputs>
-    <repeat name="tracks" title="Track">
-      <param name="input" type="data" format="interval,wig" label="Dataset"/>
-      <param name="name" type="text" value="User Track">
-        <validator type="length" max="15"/>
-      </param>
-      <param name="description" type="text" value="User Supplied Track (from Galaxy)">
-        <validator type="length" max="60"/>
-      </param>
-  		<param label="Color" name="color" type="select">
-  			<option selected="yes" value="0-0-0">Black</option>
-  			<option value="255-0-0">Red</option>
-  			<option value="0-255-0">Green</option>
-  			<option value="0-0-255">Blue</option>
-  			<option value="255-0-255">Magenta</option>
-  			<option value="0-255-255">Cyan</option>
-  			<option value="255-215-0">Gold</option>
-  			<option value="160-32-240">Purple</option>
-  			<option value="255-140-0">Orange</option>
-  			<option value="255-20-147">Pink</option>
-  			<option value="92-51-23">Dark Chocolate</option>
-  			<option value="85-107-47">Olive green</option>
-  		</param>
-  		<param label="Visibility" name="visibility" type="select">
-  			<option selected="yes" value="1">Dense</option>
-  			<option value="2">Full</option>
-  			<option value="3">Pack</option>
-  			<option value="4">Squish</option>
-  			<option value="0">Hide</option>
-  		</param>
-    </repeat>
-  </inputs>
-	<outputs>
-  	<data format="customtrack" name="out_file1" />
-	</outputs>
+<tool id="build_ucsc_custom_track_1" name="Build custom track" version="1.0.1" profile="21.01">
+    <description>for UCSC genome browser</description>
+    <requirements>
+        <requirement type="package" version="3.10">python</requirement>
+    </requirements>
+    <code file="build_ucsc_custom_track_code.py" />
+    <command detect_errors="exit_code"><![CDATA[
+python '$__tool_directory__/build_ucsc_custom_track.py'
+'$out_file1'
+#for $t in $tracks
+    '${t.input.file_name}'
+    '${t.input.ext}'
+    #if $t.input.ext == "interval"
+        '${t.input.metadata.chromCol},${t.input.metadata.startCol},${t.input.metadata.endCol},${t.input.metadata.strandCol}'
+    #else
+        "NA"
+    #end if
+    '${t.name}'
+    '${t.description}'
+    "${t.color}"
+    ${t.visibility}
+#end for
+    ]]></command>
+    <inputs>
+        <repeat name="tracks" title="Track">
+            <param name="input" type="data" format="interval,wig" label="Dataset"/>
+            <param name="name" type="text" value="User Track">
+                <validator type="length" max="15"/>
+            </param>
+            <param name="description" type="text" value="User Supplied Track (from Galaxy)">
+                <validator type="length" max="60"/>
+            </param>
+            <param name="color" type="select" label="Color">
+                <option selected="yes" value="0-0-0">Black</option>
+                <option value="255-0-0">Red</option>
+                <option value="0-255-0">Green</option>
+                <option value="0-0-255">Blue</option>
+                <option value="255-0-255">Magenta</option>
+                <option value="0-255-255">Cyan</option>
+                <option value="255-215-0">Gold</option>
+                <option value="160-32-240">Purple</option>
+                <option value="255-140-0">Orange</option>
+                <option value="255-20-147">Pink</option>
+                <option value="92-51-23">Dark Chocolate</option>
+                <option value="85-107-47">Olive green</option>
+            </param>
+            <param name="visibility" type="select" label="Visibility">
+                <option selected="yes" value="1">Dense</option>
+                <option value="2">Full</option>
+                <option value="3">Pack</option>
+                <option value="4">Squish</option>
+                <option value="0">Hide</option>
+            </param>
+        </repeat>
+    </inputs>
+    <outputs>
+        <data name="out_file1" format="customtrack" />
+    </outputs>
     <tests>
         <!--TODO: add a 2nd test here that includes 2 tracks -->
         <test>
-  		    <param name="input" value="customTrack1.bed" />
-  		    <param name="name" value="User Track" />
-  		    <param name="description" value="User Supplied Track (from Galaxy)" />
-  		    <param name="color" value="0-0-0" />
-  		    <param name="visibility" value="1" />
-  		    <output name="out_file1" file="build_ucsc_custom_track_out1.customtrack" />
+            <param name="input" value="customTrack1.bed" />
+            <param name="name" value="User Track" />
+            <param name="description" value="User Supplied Track (from Galaxy)" />
+            <param name="color" value="0-0-0" />
+            <param name="visibility" value="1" />
+            <output name="out_file1" file="build_ucsc_custom_track_out1.customtrack" />
         </test>
-	</tests>
-<help>
-
+    </tests>
+    <help><![CDATA[
 .. class:: infomark

 This tool allows you to build custom tracks using datasets in your history for the UCSC genome browser. You can view these custom tracks on the UCSC genome browser by clicking on **display at UCSC main/test** link in the history panel of the output dataset.
@@ -74,9 +77,5 @@
 .. class:: warningmark

 Please note that this tool requires **all input datasets(tracks) to have the same genome build**. The tool throws an error when this requirement is not met. You may then have to choose a valid dataset or remove invalid tracks.
-
-</help>
-
-<code file="build_ucsc_custom_track_code.py" />
-
+    ]]></help>
 </tool>
--- a/build_ucsc_custom_track_code.py	Tue Oct 13 12:34:12 2015 -0400
+++ b/build_ucsc_custom_track_code.py	Mon Feb 28 20:05:39 2022 +0000
@@ -1,21 +1,15 @@
 # runs after the job (and after the default post-filter)

-# Older py compatibility
-try:
-    set()
-except:
-    from sets import Set as set
-
-def validate_input( trans, error_map, param_values, page_param_map ):
+def validate_input(trans, error_map, param_values, page_param_map):
     dbkeys = set()
     tracks = param_values['tracks']
     for track in tracks:
         if track['input']:
-            dbkeys.add( track['input'].dbkey )
-    if len( dbkeys ) > 1:
+            dbkeys.add(track['input'].dbkey)
+    if len(dbkeys) > 1:
         # FIXME: Should be able to assume error map structure is created
         if 'tracks' not in error_map:
-            error_map['tracks'] = [ dict() for t in tracks ]
-            for i in range( len( tracks ) ):
+            error_map['tracks'] = [dict() for t in tracks]
+            for i in range(len(tracks)):
                 error_map['tracks'][i]['input'] = \
-                    "All datasets must belong to same genomic build"
\ No newline at end of file
+                    "All datasets must belong to same genomic build"