annotate rsem.py @ 0:64d45f959303

Uploaded
author jjohnson
date Mon, 11 Nov 2013 13:54:43 -0500
parents
children 1ff2fc8da328
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
1 """
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
2 SnpEff datatypes
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
3 """
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
4 import os,os.path,re,sys
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
5 import galaxy.datatypes.data
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
6 from galaxy.datatypes.images import Html
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
7 from galaxy.datatypes.metadata import MetadataElement
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
8
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
9 class RsemReference( Html ):
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
10 """Class describing an RSEM reference"""
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
11 MetadataElement( name='reference_name', default='galaxy_generated_bowtie_index', desc='RSEM Reference Name', readonly=True, visible=True, no_value=None )
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
12
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
13 file_ext = 'rsem_ref'
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
14 is_binary = True
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
15 composite_type = 'auto_primary_file'
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
16 allow_datatype_change = False
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
17
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
18 def generate_primary_file( self, dataset = None ):
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
19 """
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
20 This is called only at upload to write the html file
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
21 cannot rename the datasets here - they come with the default unfortunately
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
22 """
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
23 return '<html><head></head><body>AutoGenerated Primary File for RSEM Reference Composite Dataset</body></html>'
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
24
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
25 def regenerate_primary_file(self,dataset):
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
26 """
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
27 cannot do this until we are setting metadata
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
28 """
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
29 refname = dataset.metadata.reference_name
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
30 flist = os.listdir(dataset.extra_files_path)
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
31 rval = ['<html><head><title>RSEM Reference %s</title></head><p/>Comprises the following files:<p/><ul>' % (refname)]
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
32 for i,fname in enumerate(flist):
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
33 sfname = os.path.split(fname)[-1]
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
34 rval.append( '<li><a href="%s">%s</a>' % ( sfname, sfname ) )
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
35 rval.append( '</ul></html>' )
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
36 f = file(dataset.file_name,'w')
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
37 f.write("\n".join( rval ))
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
38 f.write('\n')
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
39 f.close()
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
40
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
41 def set_peek( self, dataset, is_multi_byte=False ):
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
42 if not dataset.dataset.purged:
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
43 dataset.peek = "RSEM Reference (%s)" % ( dataset.metadata.reference_name )
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
44 dataset.blurb = "RSEM Reference (%s)" % ( dataset.metadata.reference_name )
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
45 else:
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
46 dataset.peek = 'RSEM Reference (%s) does not exist' % ( dataset.metadata.reference_name )
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
47 dataset.blurb = 'RSEM Reference (%s) purged from disk' % ( dataset.metadata.reference_name )
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
48
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
49 def display_peek( self, dataset ):
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
50 try:
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
51 return dataset.peek
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
52 except:
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
53 return "RSEM Reference"
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
54
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
55 def set_meta( self, dataset, overwrite = True, **kwd ):
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
56 """
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
57 Expecting files:
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
58 extra_files_path/<reference_name>.grp
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
59 extra_files_path/<reference_name>.ti
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
60 extra_files_path/<reference_name>.seq
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
61 extra_files_path/<reference_name>.transcripts.fa
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
62 Optionally includes files:
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
63 extra_files_path/<reference_name>.chrlist
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
64 extra_files_path/<reference_name>.idx.fa
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
65 extra_files_path/<reference_name>.4.ebwt
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
66 extra_files_path/<reference_name>.3.ebwt
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
67 extra_files_path/<reference_name>.2.ebwt
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
68 extra_files_path/<reference_name>.1.ebwt
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
69 extra_files_path/<reference_name>.rev.2.ebwt
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
70 extra_files_path/<reference_name>.rev.1.ebwt
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
71 """
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
72 log.info( "RSEM reference set_meta %s %s" % (dataset,dataset.extra_files_path))
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
73 pat = '^(.*)\.grp$'
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
74 efp = dataset.extra_files_path
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
75 flist = os.listdir(efp)
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
76 for i,fname in enumerate(flist):
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
77 m = re.match(pat,fname)
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
78 if m:
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
79 dataset.metadata.reference_name = m.groups()[0]
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
80 break
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
81 self.regenerate_primary_file(dataset)
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
82
64d45f959303 Uploaded
jjohnson
parents:
diff changeset
83