diff rsem.py @ 0:64d45f959303

Uploaded
author jjohnson
date Mon, 11 Nov 2013 13:54:43 -0500
parents
children 1ff2fc8da328
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rsem.py	Mon Nov 11 13:54:43 2013 -0500
@@ -0,0 +1,83 @@
+"""
+SnpEff datatypes
+"""
+import os,os.path,re,sys
+import galaxy.datatypes.data
+from galaxy.datatypes.images import Html
+from galaxy.datatypes.metadata import MetadataElement
+
+class RsemReference( Html ):
+    """Class describing an RSEM reference"""
+    MetadataElement( name='reference_name', default='galaxy_generated_bowtie_index', desc='RSEM Reference Name', readonly=True, visible=True, no_value=None )
+
+    file_ext = 'rsem_ref'
+    is_binary = True
+    composite_type = 'auto_primary_file'
+    allow_datatype_change = False
+
+    def generate_primary_file( self, dataset = None ):
+        """
+        This is called only at upload to write the html file
+        cannot rename the datasets here - they come with the default unfortunately
+        """
+        return '<html><head></head><body>AutoGenerated Primary File for RSEM Reference Composite Dataset</body></html>'
+
+    def regenerate_primary_file(self,dataset):
+        """
+        cannot do this until we are setting metadata
+        """
+        refname = dataset.metadata.reference_name
+        flist = os.listdir(dataset.extra_files_path)
+        rval = ['<html><head><title>RSEM Reference %s</title></head><p/>Comprises the following files:<p/><ul>' % (refname)]
+        for i,fname in enumerate(flist):
+            sfname = os.path.split(fname)[-1]
+            rval.append( '<li><a href="%s">%s</a>' % ( sfname, sfname ) )
+        rval.append( '</ul></html>' )
+        f = file(dataset.file_name,'w')
+        f.write("\n".join( rval ))
+        f.write('\n')
+        f.close()
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        if not dataset.dataset.purged:
+            dataset.peek  = "RSEM Reference (%s)" % ( dataset.metadata.reference_name )
+            dataset.blurb =  "RSEM Reference (%s)" % ( dataset.metadata.reference_name )
+        else:
+            dataset.peek = 'RSEM Reference (%s) does not exist' % ( dataset.metadata.reference_name )
+            dataset.blurb = 'RSEM Reference (%s) purged from disk' % ( dataset.metadata.reference_name )
+
+    def display_peek( self, dataset ):
+        try:
+            return dataset.peek
+        except:
+            return "RSEM Reference"
+
+    def set_meta( self, dataset, overwrite = True, **kwd ):
+        """
+        Expecting files:
+        extra_files_path/<reference_name>.grp
+        extra_files_path/<reference_name>.ti
+        extra_files_path/<reference_name>.seq
+        extra_files_path/<reference_name>.transcripts.fa
+        Optionally includes files:
+        extra_files_path/<reference_name>.chrlist
+        extra_files_path/<reference_name>.idx.fa
+        extra_files_path/<reference_name>.4.ebwt
+        extra_files_path/<reference_name>.3.ebwt
+        extra_files_path/<reference_name>.2.ebwt
+        extra_files_path/<reference_name>.1.ebwt
+        extra_files_path/<reference_name>.rev.2.ebwt
+        extra_files_path/<reference_name>.rev.1.ebwt
+        """
+        log.info( "RSEM reference set_meta %s %s" % (dataset,dataset.extra_files_path))
+        pat = '^(.*)\.grp$'
+        efp = dataset.extra_files_path
+        flist = os.listdir(efp)
+        for i,fname in enumerate(flist):
+          m = re.match(pat,fname)
+          if m: 
+            dataset.metadata.reference_name = m.groups()[0]
+            break
+        self.regenerate_primary_file(dataset)
+
+