Mercurial > repos > bgruening > chemical_datatypes
changeset 0:af7b6c6ee439 draft
initial commit
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/datatypes_conf.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,45 @@ +<?xml version="1.0"?> + <datatypes> + <datatype_files> + <datatype_file name="molFiles.py"/> + </datatype_files> + <registration> + <!--Cheminformatics Datatypes --> + <datatype extension="mol" type="galaxy.datatypes.molFiles:SDF" display_in_upload="True"> + <converter file="mol_to_inchi_converter.xml" target_datatype="inchi"/> + <converter file="mol_to_mol2_converter.xml" target_datatype="mol2"/> + <converter file="mol_to_smi_converter.xml" target_datatype="smi"/> + </datatype> + <datatype extension="mol2" type="galaxy.datatypes.molFiles:MOL2" display_in_upload="True"> + <converter file="mol2_to_inchi_converter.xml" target_datatype="inchi"/> + <converter file="mol2_to_mol_converter.xml" target_datatype="mol"/> + <converter file="mol2_to_smi_converter.xml" target_datatype="smi"/> + <converter file="mol2_to_sdf_converter.xml" target_datatype="sdf"/> + </datatype> + <datatype extension="inchi" type="galaxy.datatypes.molFiles:InChI" display_in_upload="True"> + <converter file="inchi_to_mol_converter.xml" target_datatype="mol"/> + <converter file="inchi_to_mol2_converter.xml" target_datatype="mol2"/> + <converter file="inchi_to_smi_converter.xml" target_datatype="smi"/> + <converter file="inchi_to_sdf_converter.xml" target_datatype="sdf"/> + </datatype> + <datatype extension="smi" type="galaxy.datatypes.molFiles:SMILES" display_in_upload="True"> + <converter file="smi_to_mol_converter.xml" target_datatype="mol"/> + <converter file="smi_to_mol2_converter.xml" target_datatype="mol2"/> + <converter file="smi_to_inchi_converter.xml" target_datatype="inchi"/> + <converter file="smi_to_sdf_converter.xml" target_datatype="sdf"/> + </datatype> + <datatype extension="sdf" type="galaxy.datatypes.molFiles:SDF" display_in_upload="True"> + <converter file="sdf_to_inchi_converter.xml" target_datatype="inchi"/> + <converter file="sdf_to_smi_converter.xml" target_datatype="smi"/> + <converter file="sdf_to_mol2_converter.xml" target_datatype="mol2"/> + </datatype> + <datatype extension="fps" type="galaxy.datatypes.molFiles:FPS" display_in_upload="True" /> + <datatype extension="phar" type="galaxy.datatypes.molFiles:PHAR" display_in_upload="True" /> + </registration> + <sniffers> + <sniffer type="galaxy.datatypes.molFiles:SDF"/> + <sniffer type="galaxy.datatypes.molFiles:MOL2"/> + <sniffer type="galaxy.datatypes.molFiles:InChI"/> + <sniffer type="galaxy.datatypes.molFiles:SMILES"/> + </sniffers> +</datatypes>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/inchi_to_mol2_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_inchi_to_mol2" name="InChI to MOL2" version="1.0.0"> + <description></description> + <command >obabel -iinchi $input -omol2 -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="inchi" label="Molecules in InChI format"/> + </inputs> + <outputs> + <data name="output" format="mol2"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/inchi_to_mol_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_inchi_to_mol" name="InChI to MOL" version="1.0.0"> + <description></description> + <command >obabel -iinchi $input -omol -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="inchi" label="Molecules in InChI-format"/> + </inputs> + <outputs> + <data name="output" format="mol"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/inchi_to_sdf_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_inchi_to_sdf" name="InChI to SDF" version="1.0.0"> + <description></description> + <command >obabel -iinchi $input -osdf -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="inchi" label="Molecules in InChI format"/> + </inputs> + <outputs> + <data name="output" format="sdf"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/inchi_to_smi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_inchi_to_smi" name="InChI to SMILES" version="1.0.0"> + <description></description> + <command >obabel -iinchi $input -osmi -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="inchi" label="Molecules in InChI format"/> + </inputs> + <outputs> + <data name="output" format="smi"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/mol2_to_inchi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_mol2_to_inchi" name="MOL2 to InChI" version="1.0.0"> + <description></description> + <command >obabel -imol2 $input -oinchi -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="mol2" label="Molecules in MOL2-format"/> + </inputs> + <outputs> + <data name="output" format="inchi"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/mol2_to_mol_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_mol2_to_mol" name="MOL2 to MOL" version="1.0.0"> + <description></description> + <command >obabel -imol2 $input -omol -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="mol2" label="Molecules in MOL2-format"/> + </inputs> + <outputs> + <data name="output" format="mol"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/mol2_to_sdf_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_mol2_to_sdf" name="MOL2 to SDF" version="1.0.0"> + <description></description> + <command >obabel -imol2 $input -osdf $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="mol2" label="Molecules in MOL2-format"/> + </inputs> + <outputs> + <data name="output" format="sdf"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/mol2_to_smi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_mol2_to_smi" name="MOL2 to SMILES" version="1.0.0"> + <description></description> + <command >obabel -imol2 $input -omol $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="mol2" label="Molecules in MOL2-format"/> + </inputs> + <outputs> + <data name="output" format="smi"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/molFiles.py Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,242 @@ +# -*- coding: utf-8 -*- + +from galaxy.datatypes import data +import logging +from galaxy.datatypes.sniff import * +import commands +import pybel +import openbabel +openbabel.obErrorLog.StopLogging() + +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes import metadata + +log = logging.getLogger(__name__) + +class GenericMolFile( data.Text ): + + MetadataElement( name="molecules", default=0, desc="Number of molecules", readonly=True, visible=False, optional=True, no_value=0 ) + + file_ext = "mol2/sdf/drf" + def check_filetype( self,filename ): + self.no_mols = commands.getstatusoutput("grep -c \\$\\$\\$\\$ "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + self.file_ext="sdf" + return True + self.no_mols = commands.getstatusoutput("grep -c @\<TRIPOS\>MOLECULE "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + self.file_ext="mol2" + return True + self.no_mols = commands.getstatusoutput("grep -c \"ligand id\" "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + self.file_ext="drf" + return True + self.no_mols = commands.getstatusoutput("grep -c HEADER "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + self.file_ext="pdb" + return True + return False + + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + if(self.check_filetype(dataset.file_name)) : + if (self.no_mols[1] == '1'): + dataset.blurb = "1 molecule" + else: + dataset.blurb = "%s molecules" % self.no_mols[1] + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def get_mime(self): + return 'text/plain' + + +class GenericMultiMolFile( GenericMolFile ): + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + self.sniff(dataset.file_name) + if (self.no_mols[1] == '1'): + dataset.blurb = "1 molecule" + else: + dataset.blurb = "%s molecules" % self.no_mols[1] + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + +class SDF( GenericMultiMolFile ): + file_ext = "sdf" + def sniff( self, filename ): + self.no_mols = commands.getstatusoutput("grep -c \\$\\$\\$\\$ "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + return True + else: + return False + +class MOL2( GenericMultiMolFile ): + file_ext = "mol2" + def sniff( self, filename ): + self.no_mols = commands.getstatusoutput("grep -c @\<TRIPOS\>MOLECULE "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + return True + else: + return False + +class FPS( GenericMultiMolFile ): + file_ext = "fps" + def sniff( self, filename ): + self.no_mols = commands.getstatusoutput("grep -c -v '^#' "+filename) + with open(filename) as in_handle: + for line_counter, line in enumerate(in_handle): + line = line.strip() + if line.startswith('#FPS1'): + return True + if line_counter > 10: + return False + +class DRF( GenericMultiMolFile ): + file_ext = "drf" + def sniff( self, filename ): + self.no_mols = commands.getstatusoutput("grep -c \"ligand id\" "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + return True + else: + return False + + +class PHAR( GenericMultiMolFile ): + MetadataElement( name="base_name", desc="base name", default='Phar', + readonly=True, set_in_upload=True) + file_ext = "phar" + def sniff( self, filename ): + self.no_mols = commands.getstatusoutput("grep -c -v '^#' "+filename) + return False + +class PDB( GenericMolFile ): + file_ext = "pdb" + def sniff( self, filename ): + self.no_mols = commands.getstatusoutput("grep -c HEADER "+filename) + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + return True + else: + return False + def set_peek( self, dataset, is_multi_byte=False ): + #def set_peek( self, dataset, line_count=None, is_multi_byte=False ): + if not dataset.dataset.purged: + res = commands.getstatusoutput("lib/galaxy/datatypes/countResidues.sh "+dataset.file_name) + dataset.peek = res[1] + self.sniff(dataset.file_name) + if (self.no_mols[1] == '1'): + dataset.blurb = "1 protein structure" + else: + dataset.blurb = "%s protein structures"%self.no_mols[1] + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + +class grd ( data.Text ) : + file_ext = "grd" + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + #dataset.peek = "" + dataset.blurb = "score-grids for docking" + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + +class grdtgz ( data.Text ) : + file_ext = "grd.tgz" + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + #dataset.peek = "" + dataset.blurb = "compressed score-grids for docking" + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + +class InChI( GenericMultiMolFile ): + file_ext = "inchi" + def sniff( self, filename ): + self.no_mols = commands.getstatusoutput("grep -c '^InChI=' "+filename) + word_count = commands.getoutput("wc -w "+filename).split()[0] + + if self.no_mols[1] != word_count: + return False + + if (self.no_mols[0] == 0) & (self.no_mols[1] > 0): + return True + else: + return False + + def set_meta( self, dataset, **kwd ): + """ + Set the number of sequences and the number of data lines + in dataset. + """ + if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize: + dataset.metadata.data_lines = None + dataset.metadata.sequences = None + return + #word_count = commands.getoutput("wc -w "+filename).split()[0] + # word_count are the lines of the file, if word_count and molecule count + # are the same, that must hold to be an InChI File, then that should be + # the same number as all non-empty lines + #dataset.metadata.data_lines = word_count + #int(commands.getoutput("grep -cve '^\s*$' "+filename)) + #dataset.metadata.molecules = word_count + + +class SMILES( GenericMultiMolFile ): + file_ext = "smi" + def sniff( self, filename ): + """ + Its hard or impossible to sniff a SMILES File. All what i know is the + word_count must be the same as the non-empty line count. And that i can + try to import the first SMILES and check if it is a molecule. + """ + + # that corresponds to non-empty line count + self.no_mols = commands.getstatusoutput("grep -cve '^\s*$' "+filename) + word_count = int(commands.getoutput("wc -w "+filename).split()[0]) + + if int(self.no_mols[1]) != word_count: + return False + + if (self.no_mols[0] == 0) & (int(self.no_mols[1]) > 0): + for line in open(filename): + line = line.strip() + if line: + # if we have atoms, we have a molecule + try: + if len(pybel.readstring('smi', line).atoms) > 0: + return True + else: + return False + except: + # if convert fails its not a smiles string + return False + return True + else: + return False + + def set_meta( self, dataset, **kwd ): + """ + Set the number of sequences and the number of data lines + in dataset. + """ + if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize: + dataset.metadata.data_lines = None + dataset.metadata.sequences = None + return + + #word_count = int(commands.getoutput("wc -w "+filename).split()[0]) + # word_count are the lines of the file, if word_count and molecule count + # are the same, that must hold to be an InChI File, then that should be + # the same number as all non-empty lines + #dataset.metadata.data_lines = word_count + #dataset.metadata.molecules = word_count + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/mol_to_inchi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_mol_to_mol2" name="MOL to MOL2" version="1.0.0"> + <description></description> + <command >obabel -imol $input -omol2 -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="mol" label="Molecules in MOL-format"/> + </inputs> + <outputs> + <data name="output" format="mol2"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/mol_to_mol2_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_mol_to_mol2" name="MOL to MOL2" version="1.0.0"> + <description></description> + <command >obabel -imol $input -omol2 -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="mol" label="Molecules in MOL-format"/> + </inputs> + <outputs> + <data name="output" format="mol2"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/mol_to_smi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_mol_to_smi" name="MOL to SMILES" version="1.0.0"> + <description></description> + <command >obabel -imol $input -osmi -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="mol" label="Molecules in MOL-format"/> + </inputs> + <outputs> + <data name="output" format="smi"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/sdf_to_inchi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_sdf_to_inchi" name="SDF to InChI" version="1.0.0"> + <description></description> + <command >obabel -isdf $input -oinchi -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="sdf" label="Molecules in SD-format"/> + </inputs> + <outputs> + <data name="output" format="inchi"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/sdf_to_mol2_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_sdf_to_mol2" name="SDF to mol2" version="1.0.0"> + <description></description> + <command >obabel -isdf $input -omol2 -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="sdf" label="Molecules in SD-format"/> + </inputs> + <outputs> + <data name="output" format="mol2"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/sdf_to_smi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_sdf_to_smiles" name="SDF to SMILES" version="1.0.0"> + <description></description> + <command >obabel -isdf $input -osmi -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="sdf" label="Molecules in SD-format"/> + </inputs> + <outputs> + <data name="output" format="smi"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/smi_to_inchi_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_SMILES_to_inchi" name="SMILES to InChI" version="1.0.0"> + <description></description> + <command >obabel -ismi $input -oinchi -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="smi" label="Molecules in SMILES format"/> + </inputs> + <outputs> + <data name="output" format="inchi"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/smi_to_mol2_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_SMILES_to_MOL2" name="SMILES to MOL2" version="1.0.0"> + <description></description> + <command >obabel -ismi $input -omol2 -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="smi" label="Molecules in SMILES format"/> + </inputs> + <outputs> + <data name="output" format="mol2"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/smi_to_mol_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_SMILES_to_MOL" name="SMILES to MOL" version="1.0.0"> + <description></description> + <command >obabel -ismi $input -omol -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="smi" label="Molecules in SMILES format"/> + </inputs> + <outputs> + <data name="output" format="mol"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes/smi_to_sdf_converter.xml Tue Dec 25 05:16:25 2012 -0500 @@ -0,0 +1,15 @@ +<tool id="CONVERTER_SMILES_to_sdf" name="SMILES to SDF" version="1.0.0"> + <description></description> + <command >obabel -ismi $input -osdf -O $output -e 2>&1</command> + <inputs> + <param name="input" type="data" format="smi" label="Molecules in SMILES format"/> + </inputs> + <outputs> + <data name="output" format="sdf"/> + </outputs> + <requirements> + <requirement type="binary">obabel</requirement> + </requirements> + <help> + </help> +</tool>