view cistrome_phasdb.py @ 0:b23d89433ee4 draft default tip

Uploaded
author jjohnson
date Wed, 17 Dec 2014 16:07:17 -0500
parents
children
line wrap: on
line source

"""
Cistrome PhasDB datatype
"""
import os,os.path,re
import traceback
import galaxy.datatypes.data
from galaxy.datatypes.data import Text
from galaxy.datatypes.metadata import MetadataElement

class PhasDB( Text ):
    """Class describing a Cistrome Phascons DB"""
    """
    The extrafile path should contain BigWig files with phascons scores for each chromosome "chr" named:  <chr>.bw
    """
    file_ext = "cistrome_phasdb"
    MetadataElement( name="genome_version", default=None, desc="Genome Version", readonly=False, visible=True, no_value=None )
    MetadataElement( name="chromosomes", default=[], desc="Chromosome Names", readonly=True, visible=True, no_value=[], optional=True)
    composite_type = 'auto_primary_file'

    def __init__( self, **kwd ):
        Text.__init__( self, **kwd )

    def set_meta( self, dataset, **kwd ):
        Text.set_meta(self, dataset, **kwd )
        try:
            data_dir = dataset.extra_files_path
            ## search data_dir for bigWig files: ext .bw
            file_pattern = '(.+).bw'
            if data_dir and os.path.isdir(data_dir):
                genome_version = None
                chromosomes = []
                for root, dirs, files in os.walk(data_dir):
                    for fname in files:
                        m = re.match(file_pattern,fname)
                        if m:
                            name = m.groups()[0]
                            rel_path = root.replace(data_dir,'')
                            if  rel_path and len(rel_path) > 0:
                                rel_path = rel_path[1:]
                                genome_version = rel_path
                            chromosomes.append(name)
                            self.add_composite_file( os.path.join(rel_path,fname), description = name, mimetype = 'binary')
                dataset.metadata.chromosomes = chromosomes
                if genome_version:
                    dataset.metadata.genome_version = genome_version
        except:
            traceback.print_stack()