# HG changeset patch
# User bgruening
# Date 1356430585 18000
# Node ID af7b6c6ee439f239e1e62d1343c9cc2b43a5d72a
initial commit
diff -r 000000000000 -r af7b6c6ee439 datatypes/datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/datatypes_conf.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,45 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/inchi_to_mol2_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/inchi_to_mol2_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -iinchi $input -omol2 -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/inchi_to_mol_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/inchi_to_mol_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -iinchi $input -omol -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/inchi_to_sdf_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/inchi_to_sdf_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -iinchi $input -osdf -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/inchi_to_smi_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/inchi_to_smi_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -iinchi $input -osmi -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/mol2_to_inchi_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/mol2_to_inchi_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -imol2 $input -oinchi -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/mol2_to_mol_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/mol2_to_mol_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -imol2 $input -omol -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/mol2_to_sdf_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/mol2_to_sdf_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -imol2 $input -osdf $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/mol2_to_smi_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/mol2_to_smi_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -imol2 $input -omol $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/molFiles.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/molFiles.py Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,242 @@
+# -*- coding: utf-8 -*-
+
+from galaxy.datatypes import data
+import logging
+from galaxy.datatypes.sniff import *
+import commands
+import pybel
+import openbabel
+openbabel.obErrorLog.StopLogging()
+
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes import metadata
+
+log = logging.getLogger(__name__)
+
+class GenericMolFile( data.Text ):
+
+ MetadataElement( name="molecules", default=0, desc="Number of molecules", readonly=True, visible=False, optional=True, no_value=0 )
+
+ file_ext = "mol2/sdf/drf"
+ def check_filetype( self,filename ):
+ self.no_mols = commands.getstatusoutput("grep -c \\$\\$\\$\\$ "+filename)
+ if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
+ self.file_ext="sdf"
+ return True
+ self.no_mols = commands.getstatusoutput("grep -c @\MOLECULE "+filename)
+ if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
+ self.file_ext="mol2"
+ return True
+ self.no_mols = commands.getstatusoutput("grep -c \"ligand id\" "+filename)
+ if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
+ self.file_ext="drf"
+ return True
+ self.no_mols = commands.getstatusoutput("grep -c HEADER "+filename)
+ if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
+ self.file_ext="pdb"
+ return True
+ return False
+
+ def set_peek( self, dataset, is_multi_byte=False ):
+ if not dataset.dataset.purged:
+ if(self.check_filetype(dataset.file_name)) :
+ if (self.no_mols[1] == '1'):
+ dataset.blurb = "1 molecule"
+ else:
+ dataset.blurb = "%s molecules" % self.no_mols[1]
+ dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+
+ def get_mime(self):
+ return 'text/plain'
+
+
+class GenericMultiMolFile( GenericMolFile ):
+ def set_peek( self, dataset, is_multi_byte=False ):
+ if not dataset.dataset.purged:
+ self.sniff(dataset.file_name)
+ if (self.no_mols[1] == '1'):
+ dataset.blurb = "1 molecule"
+ else:
+ dataset.blurb = "%s molecules" % self.no_mols[1]
+ dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+
+class SDF( GenericMultiMolFile ):
+ file_ext = "sdf"
+ def sniff( self, filename ):
+ self.no_mols = commands.getstatusoutput("grep -c \\$\\$\\$\\$ "+filename)
+ if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
+ return True
+ else:
+ return False
+
+class MOL2( GenericMultiMolFile ):
+ file_ext = "mol2"
+ def sniff( self, filename ):
+ self.no_mols = commands.getstatusoutput("grep -c @\MOLECULE "+filename)
+ if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
+ return True
+ else:
+ return False
+
+class FPS( GenericMultiMolFile ):
+ file_ext = "fps"
+ def sniff( self, filename ):
+ self.no_mols = commands.getstatusoutput("grep -c -v '^#' "+filename)
+ with open(filename) as in_handle:
+ for line_counter, line in enumerate(in_handle):
+ line = line.strip()
+ if line.startswith('#FPS1'):
+ return True
+ if line_counter > 10:
+ return False
+
+class DRF( GenericMultiMolFile ):
+ file_ext = "drf"
+ def sniff( self, filename ):
+ self.no_mols = commands.getstatusoutput("grep -c \"ligand id\" "+filename)
+ if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
+ return True
+ else:
+ return False
+
+
+class PHAR( GenericMultiMolFile ):
+ MetadataElement( name="base_name", desc="base name", default='Phar',
+ readonly=True, set_in_upload=True)
+ file_ext = "phar"
+ def sniff( self, filename ):
+ self.no_mols = commands.getstatusoutput("grep -c -v '^#' "+filename)
+ return False
+
+class PDB( GenericMolFile ):
+ file_ext = "pdb"
+ def sniff( self, filename ):
+ self.no_mols = commands.getstatusoutput("grep -c HEADER "+filename)
+ if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
+ return True
+ else:
+ return False
+ def set_peek( self, dataset, is_multi_byte=False ):
+ #def set_peek( self, dataset, line_count=None, is_multi_byte=False ):
+ if not dataset.dataset.purged:
+ res = commands.getstatusoutput("lib/galaxy/datatypes/countResidues.sh "+dataset.file_name)
+ dataset.peek = res[1]
+ self.sniff(dataset.file_name)
+ if (self.no_mols[1] == '1'):
+ dataset.blurb = "1 protein structure"
+ else:
+ dataset.blurb = "%s protein structures"%self.no_mols[1]
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+
+class grd ( data.Text ) :
+ file_ext = "grd"
+ def set_peek( self, dataset, is_multi_byte=False ):
+ if not dataset.dataset.purged:
+ #dataset.peek = ""
+ dataset.blurb = "score-grids for docking"
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+
+class grdtgz ( data.Text ) :
+ file_ext = "grd.tgz"
+ def set_peek( self, dataset, is_multi_byte=False ):
+ if not dataset.dataset.purged:
+ #dataset.peek = ""
+ dataset.blurb = "compressed score-grids for docking"
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+
+
+class InChI( GenericMultiMolFile ):
+ file_ext = "inchi"
+ def sniff( self, filename ):
+ self.no_mols = commands.getstatusoutput("grep -c '^InChI=' "+filename)
+ word_count = commands.getoutput("wc -w "+filename).split()[0]
+
+ if self.no_mols[1] != word_count:
+ return False
+
+ if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
+ return True
+ else:
+ return False
+
+ def set_meta( self, dataset, **kwd ):
+ """
+ Set the number of sequences and the number of data lines
+ in dataset.
+ """
+ if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize:
+ dataset.metadata.data_lines = None
+ dataset.metadata.sequences = None
+ return
+ #word_count = commands.getoutput("wc -w "+filename).split()[0]
+ # word_count are the lines of the file, if word_count and molecule count
+ # are the same, that must hold to be an InChI File, then that should be
+ # the same number as all non-empty lines
+ #dataset.metadata.data_lines = word_count
+ #int(commands.getoutput("grep -cve '^\s*$' "+filename))
+ #dataset.metadata.molecules = word_count
+
+
+class SMILES( GenericMultiMolFile ):
+ file_ext = "smi"
+ def sniff( self, filename ):
+ """
+ Its hard or impossible to sniff a SMILES File. All what i know is the
+ word_count must be the same as the non-empty line count. And that i can
+ try to import the first SMILES and check if it is a molecule.
+ """
+
+ # that corresponds to non-empty line count
+ self.no_mols = commands.getstatusoutput("grep -cve '^\s*$' "+filename)
+ word_count = int(commands.getoutput("wc -w "+filename).split()[0])
+
+ if int(self.no_mols[1]) != word_count:
+ return False
+
+ if (self.no_mols[0] == 0) & (int(self.no_mols[1]) > 0):
+ for line in open(filename):
+ line = line.strip()
+ if line:
+ # if we have atoms, we have a molecule
+ try:
+ if len(pybel.readstring('smi', line).atoms) > 0:
+ return True
+ else:
+ return False
+ except:
+ # if convert fails its not a smiles string
+ return False
+ return True
+ else:
+ return False
+
+ def set_meta( self, dataset, **kwd ):
+ """
+ Set the number of sequences and the number of data lines
+ in dataset.
+ """
+ if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize:
+ dataset.metadata.data_lines = None
+ dataset.metadata.sequences = None
+ return
+
+ #word_count = int(commands.getoutput("wc -w "+filename).split()[0])
+ # word_count are the lines of the file, if word_count and molecule count
+ # are the same, that must hold to be an InChI File, then that should be
+ # the same number as all non-empty lines
+ #dataset.metadata.data_lines = word_count
+ #dataset.metadata.molecules = word_count
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/mol_to_inchi_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/mol_to_inchi_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -imol $input -omol2 -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/mol_to_mol2_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/mol_to_mol2_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -imol $input -omol2 -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/mol_to_smi_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/mol_to_smi_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -imol $input -osmi -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/sdf_to_inchi_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/sdf_to_inchi_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -isdf $input -oinchi -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/sdf_to_mol2_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/sdf_to_mol2_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -isdf $input -omol2 -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/sdf_to_smi_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/sdf_to_smi_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -isdf $input -osmi -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/smi_to_inchi_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/smi_to_inchi_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -ismi $input -oinchi -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/smi_to_mol2_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/smi_to_mol2_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -ismi $input -omol2 -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/smi_to_mol_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/smi_to_mol_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -ismi $input -omol -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+
diff -r 000000000000 -r af7b6c6ee439 datatypes/smi_to_sdf_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes/smi_to_sdf_converter.xml Tue Dec 25 05:16:25 2012 -0500
@@ -0,0 +1,15 @@
+
+
+ obabel -ismi $input -osdf -O $output -e 2>&1
+
+
+
+
+
+
+
+ obabel
+
+
+
+