changeset 9:6e6b05e75a3f

several updates to the silics-it repository
author Björn Grüning <bjoern.gruening@gmail.com>
date Sun, 26 May 2013 16:50:00 +0200
parents 147814e45209
children c5a538ec7b43
files .hgignore align-it/align-it.xml align-it/align-it_create_db.xml qed/qed.py qed/silicos_qed.xml repository_dependencies.xml shape-it/shape-it.xml tool_dependencies.xml
diffstat 8 files changed, 294 insertions(+), 56 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgignore	Sun May 26 16:50:00 2013 +0200
@@ -0,0 +1,1 @@
+.tar*
\ No newline at end of file
--- a/align-it/align-it.xml	Sat May 25 12:56:15 2013 -0400
+++ b/align-it/align-it.xml	Sun May 26 16:50:00 2013 +0200
@@ -5,41 +5,131 @@
         <requirement type="package" version="2.3.2">openbabel</requirement>
     </requirements>
     <command>
-        align-it --format ${database.ext} --dbase $database --pharmacophore $alignedPharmacophores --dbType 'MOL' --scores $result --epsilon $0epsilon $merge $noHybrid $withExclusion 2>&#38;1
+        align-it 
+            #if str($database.ext).strip() == 'phar':
+                --dbType PHAR
+            #else:
+                --dbType MOL
+            #end if
+            --dbase $database
+
+            --reference $reference
+            #if str($reference.ext).strip() == 'phar':
+                --refType PHAR
+            #else:
+                --refType MOL
+            #end if
+            #######################################
+            #### output options
+            #######################################
+            --pharmacophore $aligned_pharmacophores
+            
+            
+            ##--out $aligned_structures
+            ##--outType $oformat
+            
+            #if float( str($cutoff) ) > 0:
+                --cutOff $cutoff
+            #end if
+            #if int( str($best) ) > 0:
+                --best $best
+            #end if
+            --rankBy $rankBy
+            --scores $score_result_file
+
+            #######################################
+            #### Options
+            #######################################
+
+            #set $fgroups_combined = str( $fgroups ).strip()
+            --funcGroup $fgroups_combined
+
+            --epsilon $epsilon 
+            $merge 
+            $noNormal
+            $noHybrid 
+            $scoreOnly
+            $withExclusion 
+
+            2>&#38;1
     </command>
     <inputs>
-        <param name="database" type="data" format='mol,mol2,sdf,smi' label="database"/>
-        <param name="epsilon" type="float" label='change the tolerance for points to be matched' value="0.5" />
-        <param name='merge' type='boolean' truevalue='--merge' falsvalue='' label='merge pharmacophore points' />
-        <param name='noHybrid' type='boolean' truevalue='--noHybrid' falsvalue='' label='disable the use of hybrid pharmacophore points' />
-        <param name='withExclusion' type='boolean' truevalue='--withExclusion' falsvalue='' label='add exclusion spheres into the optimization process instead of processing them afterwards' />
+        <param name="database" type="data" format='mol,mol2,sdf,smi,phar' label="Defines the database of molecules that will be used to screen"/>
+        <param name="reference" type="data" format='mol,mol2,sdf,smi,phar' label="Reference Molecule"/>
+
+        <param name="fgroups" type="select" multiple="True" display="checkboxes" label="Specify a subset of the available functional groups that are used in the alignment">
+            <option value='AROM' selected="true">aromatic rings</option>
+            <option value='HDON' selected="true">hydrogen bond donors</option>
+            <option value='HACC' selected="true">hydrogen bond acceptors</option>
+            <option value='LIPO' selected="true">lipophilic spots</option>
+            <option value='CHARGE' selected="true">charge centers</option>
+        </param>
+
+
+        <param name="epsilon" type="float" value="0.5" label='Change the tolerance for points to be matched in the alignment phase' help="The lower this value, the more strict the matching between two pharmacophores will have to be before they can be aligned.">
+            <validator type="in_range" min="0" max="1" />
+        </param>
+        <param name='merge' type='boolean' truevalue='--merge' falsevalue='' label='Merge pharmacophore points' />
+        <param name='noNormal' type='boolean' truevalue='--noNormal' falsevalue='' label='No normal information is included during the alignment' help="Using this flag makes the pharmacophore models less specific but also less conformation-dependent."/>
+        <param name='noHybrid' type='boolean' truevalue='--noHybrid' falsevalue='' label='Disable the use of hybrid pharmacophore points' help="Using this flag will increase the number of pharmacophore points."/>
+        <param name='withExclusion' type='boolean' truevalue='--withExclusion' falsevalue='' label='Add exclusion spheres into the optimization process instead of processing them afterwards' help="When this flag is set, the exclusion spheres have also an impact on the optimization procedure." />
+        <param name='scoreOnly' type='boolean' truevalue='--scoreOnly' falsevalue='' label='No translational or rotational optimization will be performed' help=""/>
+
+        <!--
+        <param name='oformat' type='select' format='text' label="The aligned database structures are written to an output file of the following format">
+            <option value='smi'>SMILES</option>
+            <option value='inchi'>InChI</option>
+            <option value='sdf'>SD file</option>
+        </param>
+        -->
+
+        <param name="cutoff" type="float" value="0" label="Only structures with a score larger than this cutoff will be written to the files" help="This value should be between 0 and 1.">
+            <validator type="in_range" min="0" max="1" />
+        </param>
+        <param name="best" type="integer" value="0" label="With this option only a limited number of best scoring structures are written to the files" help="0 means this option is deactivated">
+            <validator type="in_range" min="0"/>
+        </param>
+
+        <param name='rankBy' type='select' format='text' label="This option defines the used scoring scheme">
+            <option value='TANIMOTO'>Tanimoto</option>
+            <option value='TVERSKY_REF'>TVERSKY_REF</option>
+            <option value='TVERSKY_DB'>TVERSKY_DB</option>
+        </param>
+
     </inputs>
     <outputs>
-        <data name="alignedPharmacophores" format="phar" />
-        <data name="result" format="tabular" />
+        <data name="aligned_pharmacophores" format="phar" label="${tool.name} on ${on_string} (aligned pharmacophores)"/>
+        <!--<data name="aligned_structures" format="smi" label="${tool.name} on ${on_string} (aligned structures)">
+            <change_format>
+                <when input="oformat" value="inchi" format="inchi"/>
+                <when input="oformat" value="sdf" format="sdf"/>
+            </change_format>
+        </data>-->
+        <data name="score_result_file" format="tabular" label="${tool.name} on ${on_string} (scores)"/>
     </outputs>
     <tests>
         <test>
-        <param name="database" ftype='sdf' value='CID_2244.sdf'/>
-        <param name="epsilon" value="0.5" />
-        <param name='merge' value='' />
-        <param name='noHybrid' value='' />
-        <param name='withExclusion' value='' />
-        <output name="alignedPharmacophores" ftype="phar" file="alignit_on_CID2244.phar" />
-        <output name="result" file="aliginit_scores.tabular" />
         </test>
     </tests>
     <help>
 
 **What it does**
 
-Align-it is a tool to align molecules according their pharmacophores. A
-pharmacophore is an abstract concept based on the specific interactions
-that have been observed in drug-receptor interactions: hydrogen bonding,
-charge transfer, electrostatic and hydrophobic interactions. Molecular
-modeling and/or screening using pharmacophores have proven to be an
-important and useful method in drug discovery.
+Align-it_ is a tool to align molecules according their pharmacophores.
+A pharmacophore is an abstract concept based on the specific interactions 
+that have been observed in drug-receptor interactions: hydrogen bonding, 
+charge transfer, electrostatic and hydrophobic interactions. 
+Molecular modeling and/or screening based on pharmacophore similarities 
+have proven to be an important and useful method in drug discovery.
 
+The functionality of Align-it_ consists mainly of two parts. 
+The first functionality consists of the **generation of pharmacophores from molecules** 
+(use the tool *Pharmacophore generation* if you want to store these for later use).
+Second, pairs of pharmacophores can be aligned and the resulting 
+score is calculated from the volume overlap resulting from the alignments.
+
+
+.. _Align-it: http://www.silicos-it.com/software/align-it/1.0.3/align-it.html
 
 -----
 
@@ -96,6 +186,25 @@
         $$$$    
 
 
+The format of this scores output file is as follows:
+
+======    =====================================================================
+Column    Content
+======    =====================================================================
+     1    Id of the reference structure
+     2    Maximum volume of the reference structure
+     3    Id of the database structure
+     4    Maximum volume of the database structure
+     5    Maximum volume overlap of the two structures
+     6    Overlap between pharmacophore and exclusion spheres in the reference
+     7    Corrected volume overlap between database pharmacophore and reference
+     8    Number of pharmacophore points in the processed pharmacophore
+     9    TANIMOTO score
+    10    TVERSKY_REF score
+    11    TVERSKY_DB score
+======    =====================================================================
+
+
 
     </help>
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/align-it/align-it_create_db.xml	Sun May 26 16:50:00 2013 +0200
@@ -0,0 +1,111 @@
+<tool id="ctb_alignit_create_db" name="Pharmacophore">
+    <description>generation (Align-it)</description>
+    <requirements>
+        <requirement type="package" version="1.0.0">silicos_it</requirement>
+        <requirement type="package" version="2.3.2">openbabel</requirement>
+    </requirements>
+    <command>
+        align-it 
+            #if str($database.ext).strip() == 'phar':
+                --dbType PHAR
+            #else:
+                --dbType MOL
+            #end if
+            --dbase $database
+            --pharmacophore $pharmacophores
+            $merge 
+            $noHybrid
+
+            2>&#38;1
+    </command>
+    <inputs>
+        <param name="database" type="data" format='mol,mol2,sdf,smi,phar' label="Defines the database of molecules that will be converted to pharmacophores" />
+        <param name='merge' type='boolean' truevalue='--merge' falsevalue='' label='Merge pharmacophore points' />
+        <param name='noHybrid' type='boolean' truevalue='--noHybrid' falsevalue='' label='Disable the use of hybrid pharmacophore points' help="Using this flag will increase the number of pharmacophore points."/>
+    </inputs>
+    <outputs>
+        <data name="pharmacophores" format="phar" label="${tool.name} on ${on_string} (scores)"/>
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+Align-it_ is a tool to align molecules according their pharmacophores.
+A pharmacophore is an abstract concept based on the specific interactions 
+that have been observed in drug-receptor interactions: hydrogen bonding, 
+charge transfer, electrostatic and hydrophobic interactions. 
+Molecular modeling and/or screening based on pharmacophore similarities 
+have proven to be an important and useful method in drug discovery.
+
+The functionality of Align-it_ consists mainly of two parts. 
+The first functionality consists of the **generation of pharmacophores from molecules** 
+(thats the function of this tool). Second, pairs of pharmacophores 
+can be aligned (use the tool *Pharmacophore Alignment*) and the resulting 
+score is calculated from the volume overlap resulting from the alignments.
+
+
+.. _Align-it: http://www.silicos-it.com/software/align-it/1.0.3/align-it.html
+
+
+-----
+
+**Example**
+
+* input::
+
+    - database
+
+     30 31  0     0  0  0  0  0  0999 V2000
+        1.9541    1.1500   -2.5078 Cl  0  0  0  0  0  0  0  0  0  0  0  0
+        1.1377   -1.6392    2.1136 Cl  0  0  0  0  0  0  0  0  0  0  0  0
+       -3.2620   -2.9284   -1.0647 O   0  0  0  0  0  0  0  0  0  0  0  0
+       -2.7906   -1.9108    0.9092 O   0  0  0  0  0  0  0  0  0  0  0  0
+        0.2679   -0.2051   -0.3990 N   0  0  0  0  0  0  0  0  0  0  0  0
+       -2.0640    0.5139   -0.3769 C   0  0  0  0  0  0  0  0  0  0  0  0
+       -0.7313    0.7178   -0.0192 C   0  0  0  0  0  0  0  0  0  0  0  0
+       -2.4761   -0.6830   -1.1703 C   0  0  0  0  0  0  0  0  0  0  0  0
+        1.6571   -0.2482   -0.1795 C   0  0  0  0  0  0  0  0  0  0  0  0
+       -3.0382    1.4350    0.0081 C   0  0  0  0  0  0  0  0  0  0  0  0
+       -0.3728    1.8429    0.7234 C   0  0  0  0  0  0  0  0  0  0  0  0
+       -2.6797    2.5600    0.7506 C   0  0  0  0  0  0  0  0  0  0  0  0
+       -1.3470    2.7640    1.1083 C   0  0  0  0  0  0  0  0  0  0  0  0
+        2.5353    0.3477   -1.0918 C   0  0  0  0  0  0  0  0  0  0  0  0
+        2.1740   -0.8865    0.9534 C   0  0  0  0  0  0  0  0  0  0  0  0
+       -2.8480   -1.8749   -0.3123 C   0  0  0  0  0  0  0  0  0  0  0  0
+        3.9124    0.3058   -0.8739 C   0  0  0  0  0  0  0  0  0  0  0  0
+        3.5511   -0.9285    1.1713 C   0  0  0  0  0  0  0  0  0  0  0  0
+        4.4203   -0.3324    0.2576 C   0  0  0  0  0  0  0  0  0  0  0  0
+       -1.7086   -0.9792   -1.8930 H   0  0  0  0  0  0  0  0  0  0  0  0
+       -3.3614   -0.4266   -1.7676 H   0  0  0  0  0  0  0  0  0  0  0  0
+       -0.0861   -1.1146   -0.6780 H   0  0  0  0  0  0  0  0  0  0  0  0
+       -4.0812    1.2885   -0.2604 H   0  0  0  0  0  0  0  0  0  0  0  0
+        0.6569    2.0278    1.0167 H   0  0  0  0  0  0  0  0  0  0  0  0
+       -3.4382    3.2769    1.0511 H   0  0  0  0  0  0  0  0  0  0  0  0
+       -1.0683    3.6399    1.6868 H   0  0  0  0  0  0  0  0  0  0  0  0
+        4.6037    0.7654   -1.5758 H   0  0  0  0  0  0  0  0  0  0  0  0
+        3.9635   -1.4215    2.0480 H   0  0  0  0  0  0  0  0  0  0  0  0
+        5.4925   -0.3651    0.4274 H   0  0  0  0  0  0  0  0  0  0  0  0
+       -3.5025   -3.7011   -0.5102 H   0  0  0  0  0  0  0  0  0  0  0  0
+
+    - cutoff : 0.0
+
+* output::
+    
+    - aligned Pharmacophores 
+
+        3033
+        HYBL    -1.98494    1.9958    0.532089    0.7    0    0    0    0
+        HYBL    3.52122    -0.309347    0.122783    0.7    0    0    0    0
+        HYBH    -3.262    -2.9284    -1.0647    1    1    -3.5666    -3.7035    -1.61827
+        HDON    0.2679    -0.2051    -0.399    1    1    -0.076102    -0.981133    -0.927616
+        HACC    -2.7906    -1.9108    0.9092    1    1    -2.74368    -1.94015    1.90767
+        $$$$    
+
+
+
+    </help>
+</tool>
--- a/qed/qed.py	Sat May 25 12:56:15 2013 -0400
+++ b/qed/qed.py	Sun May 26 16:50:00 2013 +0200
@@ -16,17 +16,22 @@
 
 def check_filetype(filepath):
     mol = False
-    for line in open(filepath):
+    possible_inchi = True
+    for line_counter, line in enumerate(open(filepath)):
+        if line_counter > 10000:
+            break
         if line.find('$$$$') != -1:
             return 'sdf'
         elif line.find('@<TRIPOS>MOLECULE') != -1:
             return 'mol2'
         elif line.find('ligand id') != -1:
             return 'drf'
-        elif re.findall('^InChI=', line):
+        elif possible_inchi and re.findall('^InChI=', line):
             return 'inchi'
         elif re.findall('^M\s+END', line):
             mol = True
+        # first line is not an InChI, so it can't be an InChI file
+        possible_inchi = False
 
     if mol:
         # END can occures before $$$$, so and SDF file will 
@@ -288,17 +293,22 @@
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--input', required=True, help='path to the input file name')
-    parser.add_argument("-m", "--method", dest="method", choices=['max', 'mean', 'unweighted'],
-                      default="mean",
-                      help="Specify the method you want to use.")
-
+    parser.add_argument('-i', '--input', 
+                required=True, 
+                help='path to the input file name')
+    parser.add_argument("-m", "--method", 
+                dest="method",
+                choices=['max', 'mean', 'unweighted'],
+                default="mean",
+                help="Specify the method you want to use.")
+    parser.add_argument("--iformat",
+                help="Input format. It must be supported by openbabel.")
     parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), 
-        default=sys.stdout, help="path to the result file, default it sdtout")
-
+                default=sys.stdout, 
+                help="path to the result file, default it sdtout")
     parser.add_argument("--header", dest="header", action="store_true",
-                    default=False,
-                    help="Write header line.")
+                default=False,
+                help="Write header line.")
 
 
     args = parser.parse_args()
@@ -315,15 +325,18 @@
         print "Error: ", ifile, " is not readable."
         sys.exit(1)
 
-    filetype = check_filetype(ifile)
+    if not args.iformat:
+        # try to guess the filetype
+        filetype = check_filetype( ifile )
+    else:
+        filetype = args.iformat # sdf or smi
 
 
     """
         We want to store the original SMILES in the output. So in case of a SMILES file iterate over the file and convert each line separate.
     """
-
     if filetype == 'sdf':
-        supplier = Chem.SDMolSupplier(ifile)
+        supplier = Chem.SDMolSupplier( ifile )
         # Process file
         if args.header:
             args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\n")
@@ -356,7 +369,7 @@
                 mol.GetProp("_Name"),
                 ))
     elif filetype == 'smi':
-        supplier = Chem.SmilesMolSupplier(ifile, " \t", 0, 1, False, True)
+        supplier = Chem.SmilesMolSupplier( ifile, " \t", 0, 1, False, True )
 
         # Process file
         if args.header:
@@ -397,9 +410,5 @@
                 title,
                 smiles
                 ))
-
     else:
-        print "Error: unknown file extension: ", extension
-        sys.exit(1)
-
-    sys.exit(0)
+        sys.exit("Error: unknown file-type: ", filetype)
--- a/qed/silicos_qed.xml	Sat May 25 12:56:15 2013 -0400
+++ b/qed/silicos_qed.xml	Sun May 26 16:50:00 2013 +0200
@@ -6,7 +6,12 @@
     <requirement type="package" version="2012_12_1">rdkit</requirement>
     <requirement type="package" version="1.7.1">numpy</requirement>
   </requirements>
-  <command interpreter="python">qed.py -i "${infile}" --method "${method}" -o "${outfile}" $header 2>&#38;1</command>
+  <command interpreter="python">
+    qed.py -i "${infile}" 
+        --method "${method}" 
+        --iformat ${infile.ext} 
+        -o "${outfile}" $header 2>&#38;1
+  </command>
   <inputs>
     <param format="smi,sdf" name="infile" type="data" label="Molecule data in SD- or SMILES-format" help="Dataset missing? See TIP below"/>
     <param name="method" type="select" label="Method">
--- a/repository_dependencies.xml	Sat May 25 12:56:15 2013 -0400
+++ b/repository_dependencies.xml	Sun May 26 16:50:00 2013 +0200
@@ -1,4 +1,4 @@
 <?xml version="1.0"?>
 <repositories description="This requires the Molecule datatype definitions (e.g. SMILES, InChI, SD-format).">
-    <repository changeset_revision="c5383bd4d006" name="molecule_datatypes" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" />
-</repositories>
+    <repository name="molecule_datatypes" owner="bgruening" />
+</repositories> 
--- a/shape-it/shape-it.xml	Sat May 25 12:56:15 2013 -0400
+++ b/shape-it/shape-it.xml	Sun May 26 16:50:00 2013 +0200
@@ -31,10 +31,10 @@
 
 **What it does**
 
-Shape-it is a program for the alignment of a reference molecule against a
-database of molecules using the shape of the molecules. It is based on
-the use of Gaussian volumes as descriptor for molecular shape as it was
-introduced by Grant and Pickup1 .
+Shape-it_ is a tool that aligns a reference molecule against a set of database
+molecules using the shape of the molecules as the align criterion. 
+It is based on the use of Gaussian volumes as descriptor for molecular 
+shape as it was introduced by `Grant and Pickup`_.
 
 The program expects one reference molecule with
 its three-dimensional coordinates and one database files containing one
@@ -42,6 +42,10 @@
 of all database molecules and their respective scores or the N best
 scoring molecules from the complete database.
 
+
+.. _Shape-it: http://silicos-it.com/software/shape-it/1.0.1/shape-it.html
+.. _`Grant and Pickup`: http://onlinelibrary.wiley.com/doi/10.1002/(SICI)1096-987X(19961115)17:14%3C1653::AID-JCC7%3E3.0.CO;2-K/abstract
+
 -----
 
 **Example**
--- a/tool_dependencies.xml	Sat May 25 12:56:15 2013 -0400
+++ b/tool_dependencies.xml	Sun May 26 16:50:00 2013 +0200
@@ -1,13 +1,12 @@
-<?xml version="1.0"?>
 <tool_dependency>
     <package name="numpy" version="1.7.1">
-        <repository changeset_revision="7283651b62fe" name="package_numpy_1_7" owner="bgruening" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+        <repository name="package_numpy_1_7" owner="bgruening" prior_installation_required="True" />
     </package>
     <package name="openbabel" version="2.3.2">
-        <repository changeset_revision="ccda1b8ebc72" name="package_openbabel_2_3" owner="bgruening" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+        <repository name="package_openbabel_2_3" owner="bgruening" prior_installation_required="True" />
     </package>
     <package name="rdkit" version="2012_12_1">
-        <repository changeset_revision="87b2d2831a31" name="package_rdkit_2012_12" owner="bgruening" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+        <repository name="package_rdkit_2012_12" owner="bgruening" prior_installation_required="True" />
     </package>
     <package name="silicos_it" version="1.0.0">
         <install version="1.0">
@@ -19,7 +18,7 @@
                     $OPENBABEL_INCLUDE_DIR and $OPENBABEL_LIB_DIR
                 -->
                 <action type="set_environment_for_install">
-                    <repository changeset_revision="ccda1b8ebc72" name="package_openbabel_2_3" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu">
+                    <repository name="package_openbabel_2_3" owner="bgruening">
                         <package name="openbabel" version="2.3.2" />
                     </repository>
                 </action>
@@ -31,7 +30,7 @@
                     make  &amp;&amp;
                     make install</action>
                 <action type="set_environment">
-                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/strip-it/bin</environment_variable>
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/strip-it/bin</environment_variable>
                 </action>
 
                 <!-- compiling align-it -->
@@ -42,7 +41,7 @@
                     make &amp;&amp; 
                     make install</action>
                 <action type="set_environment">
-                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/align-it/bin</environment_variable>
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/align-it/bin</environment_variable>
                 </action>
 
                 <!-- compiling shape-it -->
@@ -53,7 +52,7 @@
                     make &amp;&amp; 
                     make install</action>
                 <action type="set_environment">
-                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/shape-it/bin</environment_variable>
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/shape-it/bin</environment_variable>
                 </action>
 
             </actions>