# HG changeset patch # User bgruening # Date 1356430585 18000 # Node ID af7b6c6ee439f239e1e62d1343c9cc2b43a5d72a initial commit diff -r 000000000000 -r af7b6c6ee439 datatypes/datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/datatypes_conf.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/inchi_to_mol2_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/inchi_to_mol2_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -iinchi $input -omol2 -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/inchi_to_mol_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/inchi_to_mol_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -iinchi $input -omol -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/inchi_to_sdf_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/inchi_to_sdf_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -iinchi $input -osdf -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/inchi_to_smi_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/inchi_to_smi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -iinchi $input -osmi -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/mol2_to_inchi_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/mol2_to_inchi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -imol2 $input -oinchi -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/mol2_to_mol_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/mol2_to_mol_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -imol2 $input -omol -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/mol2_to_sdf_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/mol2_to_sdf_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -imol2 $input -osdf $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/mol2_to_smi_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/mol2_to_smi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -imol2 $input -omol $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/molFiles.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/molFiles.py Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,242 @@ +# -*- coding: utf-8 -*- + +from galaxy.datatypes import data +import logging +from galaxy.datatypes.sniff import * +import commands +import pybel +import openbabel +openbabel.obErrorLog.StopLogging() + +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes import metadata + +log = logging.getLogger(__name__) + +class GenericMolFile( data.Text ): + + MetadataElement( name="molecules", default=0, desc="Number of molecules", readonly=True, visible=False, optional=True, no_value=0 ) + + file_ext = "mol2/sdf/drf" + def check_filetype( self,filename ): + self.no_mols = commands.getstatusoutput("grep -c \\$\\$\\$\\$ "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + self.file_ext="sdf" + return True + self.no_mols = commands.getstatusoutput("grep -c @\MOLECULE "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + self.file_ext="mol2" + return True + self.no_mols = commands.getstatusoutput("grep -c \"ligand id\" "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + self.file_ext="drf" + return True + self.no_mols = commands.getstatusoutput("grep -c HEADER "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + self.file_ext="pdb" + return True + return False + + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + if(self.check_filetype(dataset.file_name)) : + if (self.no_mols[1] == '1'): + dataset.blurb = "1 molecule" + else: + dataset.blurb = "%s molecules" % self.no_mols[1] + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def get_mime(self): + return 'text/plain' + + +class GenericMultiMolFile( GenericMolFile ): + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + self.sniff(dataset.file_name) + if (self.no_mols[1] == '1'): + dataset.blurb = "1 molecule" + else: + dataset.blurb = "%s molecules" % self.no_mols[1] + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + +class SDF( GenericMultiMolFile ): + file_ext = "sdf" + def sniff( self, filename ): + self.no_mols = commands.getstatusoutput("grep -c \\$\\$\\$\\$ "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + return True + else: + return False + +class MOL2( GenericMultiMolFile ): + file_ext = "mol2" + def sniff( self, filename ): + self.no_mols = commands.getstatusoutput("grep -c @\MOLECULE "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + return True + else: + return False + +class FPS( GenericMultiMolFile ): + file_ext = "fps" + def sniff( self, filename ): + self.no_mols = commands.getstatusoutput("grep -c -v '^#' "+filename) + with open(filename) as in_handle: + for line_counter, line in enumerate(in_handle): + line = line.strip() + if line.startswith('#FPS1'): + return True + if line_counter > 10: + return False + +class DRF( GenericMultiMolFile ): + file_ext = "drf" + def sniff( self, filename ): + self.no_mols = commands.getstatusoutput("grep -c \"ligand id\" "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + return True + else: + return False + + +class PHAR( GenericMultiMolFile ): + MetadataElement( name="base_name", desc="base name", default='Phar', + readonly=True, set_in_upload=True) + file_ext = "phar" + def sniff( self, filename ): + self.no_mols = commands.getstatusoutput("grep -c -v '^#' "+filename) + return False + +class PDB( GenericMolFile ): + file_ext = "pdb" + def sniff( self, filename ): + self.no_mols = commands.getstatusoutput("grep -c HEADER "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + return True + else: + return False + def set_peek( self, dataset, is_multi_byte=False ): + #def set_peek( self, dataset, line_count=None, is_multi_byte=False ): + if not dataset.dataset.purged: + res = commands.getstatusoutput("lib/galaxy/datatypes/countResidues.sh "+dataset.file_name) + dataset.peek = res[1] + self.sniff(dataset.file_name) + if (self.no_mols[1] == '1'): + dataset.blurb = "1 protein structure" + else: + dataset.blurb = "%s protein structures"%self.no_mols[1] + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + +class grd ( data.Text ) : + file_ext = "grd" + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + #dataset.peek = "" + dataset.blurb = "score-grids for docking" + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + +class grdtgz ( data.Text ) : + file_ext = "grd.tgz" + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + #dataset.peek = "" + dataset.blurb = "compressed score-grids for docking" + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + +class InChI( GenericMultiMolFile ): + file_ext = "inchi" + def sniff( self, filename ): + self.no_mols = commands.getstatusoutput("grep -c '^InChI=' "+filename) + word_count = commands.getoutput("wc -w "+filename).split()[0] + + if self.no_mols[1] != word_count: + return False + + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + return True + else: + return False + + def set_meta( self, dataset, **kwd ): + """ + Set the number of sequences and the number of data lines + in dataset. + """ + if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize: + dataset.metadata.data_lines = None + dataset.metadata.sequences = None + return + #word_count = commands.getoutput("wc -w "+filename).split()[0] + # word_count are the lines of the file, if word_count and molecule count + # are the same, that must hold to be an InChI File, then that should be + # the same number as all non-empty lines + #dataset.metadata.data_lines = word_count + #int(commands.getoutput("grep -cve '^\s*$' "+filename)) + #dataset.metadata.molecules = word_count + + +class SMILES( GenericMultiMolFile ): + file_ext = "smi" + def sniff( self, filename ): + """ + Its hard or impossible to sniff a SMILES File. All what i know is the + word_count must be the same as the non-empty line count. And that i can + try to import the first SMILES and check if it is a molecule. + """ + + # that corresponds to non-empty line count + self.no_mols = commands.getstatusoutput("grep -cve '^\s*$' "+filename) + word_count = int(commands.getoutput("wc -w "+filename).split()[0]) + + if int(self.no_mols[1]) != word_count: + return False + + if (self.no_mols[0] == 0) & (int(self.no_mols[1]) > 0): + for line in open(filename): + line = line.strip() + if line: + # if we have atoms, we have a molecule + try: + if len(pybel.readstring('smi', line).atoms) > 0: + return True + else: + return False + except: + # if convert fails its not a smiles string + return False + return True + else: + return False + + def set_meta( self, dataset, **kwd ): + """ + Set the number of sequences and the number of data lines + in dataset. + """ + if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize: + dataset.metadata.data_lines = None + dataset.metadata.sequences = None + return + + #word_count = int(commands.getoutput("wc -w "+filename).split()[0]) + # word_count are the lines of the file, if word_count and molecule count + # are the same, that must hold to be an InChI File, then that should be + # the same number as all non-empty lines + #dataset.metadata.data_lines = word_count + #dataset.metadata.molecules = word_count + + diff -r 000000000000 -r af7b6c6ee439 datatypes/mol_to_inchi_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/mol_to_inchi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -imol $input -omol2 -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/mol_to_mol2_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/mol_to_mol2_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -imol $input -omol2 -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/mol_to_smi_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/mol_to_smi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -imol $input -osmi -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/sdf_to_inchi_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/sdf_to_inchi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -isdf $input -oinchi -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/sdf_to_mol2_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/sdf_to_mol2_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -isdf $input -omol2 -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/sdf_to_smi_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/sdf_to_smi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -isdf $input -osmi -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/smi_to_inchi_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/smi_to_inchi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -ismi $input -oinchi -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/smi_to_mol2_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/smi_to_mol2_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -ismi $input -omol2 -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/smi_to_mol_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/smi_to_mol_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -ismi $input -omol -O $output -e 2>&1 + + + + + + + + obabel + + + + diff -r 000000000000 -r af7b6c6ee439 datatypes/smi_to_sdf_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/smi_to_sdf_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ + + + obabel -ismi $input -osdf -O $output -e 2>&1 + + + + + + + + obabel + + + +