Mercurial > repos > bgruening > silicos_it
changeset 0:80efb29755f3
Uploaded
author | bgruening |
---|---|
date | Sun, 31 Mar 2013 13:16:06 -0400 |
parents | |
children | 54edf22c93c0 |
files | qed/errors.py qed/errors.pyc qed/qed.py qed/silicos_qed.xml qed/tool-data/qed_test.smi qed/tool-data/qed_test_max.tab qed/tool-data/qed_test_mean.tab qed/tool-data/qed_test_unweighted.tab repository_dependencies.xml shape-it/shape-it.xml shape-it/test_data/CID_3033.sdf shape-it/test_data/CID_3037.sdf shape-it/test_data/shapeit_on_CID30333_and_CID3037.sdf strip-it/strip-it.xml strip-it/test-data/CID_3037.sdf strip-it/test-data/Strip-it_on_CID3037.tabular tool_dependencies.xml |
diffstat | 17 files changed, 2177 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qed/errors.py Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,17 @@ +__all__ = ['SilicosItError', 'WrongArgument'] + +class SilicosItError(Exception): + """Base class for exceptions in Silicos-it code""" + pass + +class WrongArgument(SilicosItError): + """ + Exception raised when argument to function is not of correct type. + + Attributes: + function -- function in which error occurred + msg -- explanation of the error + """ + def __init__(self, function, msg): + self.function = function + self.msg = msg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qed/qed.py Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,404 @@ +__all__ = ['weights_max', 'weights_mean', 'weights_none', 'default'] + +# Silicos-it +from errors import WrongArgument + +# RDKit +from rdkit.Chem import Descriptors +from rdkit import Chem + +# General +from copy import deepcopy +from math import exp, log +import sys, os, re +import argparse + +def check_filetype(filepath): + mol = False + for line in open(filepath): + if line.find('$$$$') != -1: + return 'sdf' + elif line.find('@<TRIPOS>MOLECULE') != -1: + return 'mol2' + elif line.find('ligand id') != -1: + return 'drf' + elif re.findall('^InChI=', line): + return 'inchi' + elif re.findall('^M\s+END', line): + mol = True + + if mol: + # END can occures before $$$$, so and SDF file will + # be recognised as mol, if you not using this hack' + return 'mol' + return 'smi' + +AliphaticRings = Chem.MolFromSmarts('[$([A;R][!a])]') + +AcceptorSmarts = [ + '[oH0;X2]', + '[OH1;X2;v2]', + '[OH0;X2;v2]', + '[OH0;X1;v2]', + '[O-;X1]', + '[SH0;X2;v2]', + '[SH0;X1;v2]', + '[S-;X1]', + '[nH0;X2]', + '[NH0;X1;v3]', + '[$([N;+0;X3;v3]);!$(N[C,S]=O)]' + ] +Acceptors = [] +for hba in AcceptorSmarts: + Acceptors.append(Chem.MolFromSmarts(hba)) + +StructuralAlertSmarts = [ + '*1[O,S,N]*1', + '[S,C](=[O,S])[F,Br,Cl,I]', + '[CX4][Cl,Br,I]', + '[C,c]S(=O)(=O)O[C,c]', + '[$([CH]),$(CC)]#CC(=O)[C,c]', + '[$([CH]),$(CC)]#CC(=O)O[C,c]', + 'n[OH]', + '[$([CH]),$(CC)]#CS(=O)(=O)[C,c]', + 'C=C(C=O)C=O', + 'n1c([F,Cl,Br,I])cccc1', + '[CH1](=O)', + '[O,o][O,o]', + '[C;!R]=[N;!R]', + '[N!R]=[N!R]', + '[#6](=O)[#6](=O)', + '[S,s][S,s]', + '[N,n][NH2]', + 'C(=O)N[NH2]', + '[C,c]=S', + '[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]', + 'C1(=[O,N])C=CC(=[O,N])C=C1', + 'C1(=[O,N])C(=[O,N])C=CC=C1', + 'a21aa3a(aa1aaaa2)aaaa3', + 'a31a(a2a(aa1)aaaa2)aaaa3', + 'a1aa2a3a(a1)A=AA=A3=AA=A2', + 'c1cc([NH2])ccc1', + '[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si,Na,Ca,Ge,Ag,Mg,K,Ba,Sr,Be,Ti,Mo,Mn,Ru,Pd,Ni,Cu,Au,Cd,Al,Ga,Sn,Rh,Tl,Bi,Nb,Li,Pb,Hf,Ho]', + 'I', + 'OS(=O)(=O)[O-]', + '[N+](=O)[O-]', + 'C(=O)N[OH]', + 'C1NC(=O)NC(=O)1', + '[SH]', + '[S-]', + 'c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]', + 'c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]', + '[CR1]1[CR1][CR1][CR1][CR1][CR1][CR1]1', + '[CR1]1[CR1][CR1]cc[CR1][CR1]1', + '[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1', + '[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1', + '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1', + '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1', + 'C#C', + '[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]', + '[$([N+R]),$([n+R]),$([N+]=C)][O-]', + '[C,c]=N[OH]', + '[C,c]=NOC=O', + '[C,c](=O)[CX4,CR0X3,O][C,c](=O)', + 'c1ccc2c(c1)ccc(=O)o2', + '[O+,o+,S+,s+]', + 'N=C=O', + '[NX3,NX4][F,Cl,Br,I]', + 'c1ccccc1OC(=O)[#6]', + '[CR0]=[CR0][CR0]=[CR0]', + '[C+,c+,C-,c-]', + 'N=[N+]=[N-]', + 'C12C(NC(N1)=O)CSC2', + 'c1c([OH])c([OH,NH2,NH])ccc1', + 'P', + '[N,O,S]C#N', + 'C=C=O', + '[Si][F,Cl,Br,I]', + '[SX2]O', + '[SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)', + 'O1CCCCC1OC2CCC3CCCCC3C2', + 'N=[CR0][N,n,O,S]', + '[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2]2', + 'C=[C!r]C#N', + '[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1', + '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1', + '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])', + '[OH]c1ccc([OH,NH2,NH])cc1', + 'c1ccccc1OC(=O)O', + '[SX2H0][N]', + 'c12ccccc1(SC(S)=N2)', + 'c12ccccc1(SC(=S)N2)', + 'c1nnnn1C=O', + 's1c(S)nnc1NC=O', + 'S1C=CSC1=S', + 'C(=O)Onnn', + 'OS(=O)(=O)C(F)(F)F', + 'N#CC[OH]', + 'N#CC(=O)', + 'S(=O)(=O)C#N', + 'N[CH2]C#N', + 'C1(=O)NCC1', + 'S(=O)(=O)[O-,OH]', + 'NC[F,Cl,Br,I]', + 'C=[C!r]O', + '[NX2+0]=[O+0]', + '[OR0,NR0][OR0,NR0]', + 'C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]', + '[CX2R0][NX3R0]', + 'c1ccccc1[C;!R]=[C;!R]c2ccccc2', + '[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]', + '[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,C,n,N,o,O]', + '[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]', + '[*]=[N+]=[*]', + '[SX3](=O)[O-,OH]', + 'N#N', + 'F.F.F.F', + '[R0;D2][R0;D2][R0;D2][R0;D2]', + '[cR,CR]~C(=O)NC(=O)~[cR,CR]', + 'C=!@CC=[O,S]', + '[#6,#8,#16][C,c](=O)O[C,c]', + 'c[C;R0](=[O,S])[C,c]', + 'c[SX2][C;!R]', + 'C=C=C', + 'c1nc([F,Cl,Br,I,S])ncc1', + 'c1ncnc([F,Cl,Br,I,S])c1', + 'c1nc(c2c(n1)nc(n2)[F,Cl,Br,I])', + '[C,c]S(=O)(=O)c1ccc(cc1)F', + '[15N]', + '[13C]', + '[18O]', + '[34S]' + ] + +StructuralAlerts = [] +for smarts in StructuralAlertSmarts: + StructuralAlerts.append(Chem.MolFromSmarts(smarts)) + + +# ADS parameters for the 8 molecular properties: [row][column] +# rows[8]: MW, ALOGP, HBA, HBD, PSA, ROTB, AROM, ALERTS +# columns[7]: A, B, C, D, E, F, DMAX +# ALOGP parameters from Gregory Gerebtzoff (2012, Roche) +pads1 = [ [2.817065973, 392.5754953, 290.7489764, 2.419764353, 49.22325677, 65.37051707, 104.9805561], + [0.486849448, 186.2293718, 2.066177165, 3.902720615, 1.027025453, 0.913012565, 145.4314800], + [2.948620388, 160.4605972, 3.615294657, 4.435986202, 0.290141953, 1.300669958, 148.7763046], + [1.618662227, 1010.051101, 0.985094388, 0.000000001, 0.713820843, 0.920922555, 258.1632616], + [1.876861559, 125.2232657, 62.90773554, 87.83366614, 12.01999824, 28.51324732, 104.5686167], + [0.010000000, 272.4121427, 2.558379970, 1.565547684, 1.271567166, 2.758063707, 105.4420403], + [3.217788970, 957.7374108, 2.274627939, 0.000000001, 1.317690384, 0.375760881, 312.3372610], + [0.010000000, 1199.094025, -0.09002883, 0.000000001, 0.185904477, 0.875193782, 417.7253140] ] +# ALOGP parameters from the original publication +pads2 = [ [2.817065973, 392.5754953, 290.7489764, 2.419764353, 49.22325677, 65.37051707, 104.9805561], + [3.172690585, 137.8624751, 2.534937431, 4.581497897, 0.822739154, 0.576295591, 131.3186604], + [2.948620388, 160.4605972, 3.615294657, 4.435986202, 0.290141953, 1.300669958, 148.7763046], + [1.618662227, 1010.051101, 0.985094388, 0.000000001, 0.713820843, 0.920922555, 258.1632616], + [1.876861559, 125.2232657, 62.90773554, 87.83366614, 12.01999824, 28.51324732, 104.5686167], + [0.010000000, 272.4121427, 2.558379970, 1.565547684, 1.271567166, 2.758063707, 105.4420403], + [3.217788970, 957.7374108, 2.274627939, 0.000000001, 1.317690384, 0.375760881, 312.3372610], + [0.010000000, 1199.094025, -0.09002883, 0.000000001, 0.185904477, 0.875193782, 417.7253140] ] + +def ads(x, a, b, c, d, e, f, dmax): + return ((a+(b/(1+exp(-1*(x-c+d/2)/e))*(1-1/(1+exp(-1*(x-c-d/2)/f))))) / dmax) + +def properties(mol): + """ + Calculates the properties that are required to calculate the QED descriptor. + """ + matches = [] + if (mol is None): + raise WrongArgument("properties(mol)", "mol argument is \'None\'") + x = [0] * 9 + x[0] = Descriptors.MolWt(mol) # MW + x[1] = Descriptors.MolLogP(mol) # ALOGP + for hba in Acceptors: # HBA + if (mol.HasSubstructMatch(hba)): + matches = mol.GetSubstructMatches(hba) + x[2] += len(matches) + x[3] = Descriptors.NumHDonors(mol) # HBD + x[4] = Descriptors.TPSA(mol) # PSA + x[5] = Descriptors.NumRotatableBonds(mol) # ROTB + x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol), AliphaticRings)) # AROM + for alert in StructuralAlerts: # ALERTS + if (mol.HasSubstructMatch(alert)): x[7] += 1 + ro5_failed = 0 + if x[3] > 5: + ro5_failed += 1 #HBD + if x[2] > 10: + ro5_failed += 1 #HBA + if x[0] >= 500: + ro5_failed += 1 + if x[1] > 5: + ro5_failed += 1 + x[8] = ro5_failed + return x + + +def qed(w, p, gerebtzoff): + d = [0.00] * 8 + if gerebtzoff: + for i in range(0, 8): + d[i] = ads(p[i], pads1[i][0], pads1[i][1], pads1[i][2], pads1[i][3], pads1[i][4], pads1[i][5], pads1[i][6]) + else: + for i in range(0, 8): + d[i] = ads(p[i], pads2[i][0], pads2[i][1], pads2[i][2], pads2[i][3], pads2[i][4], pads2[i][5], pads2[i][6]) + t = 0.0 + for i in range(0, 8): + t += w[i] * log(d[i]) + return (exp(t / sum(w))) + + +def weights_max(mol, gerebtzoff = True, props = False): + """ + Calculates the QED descriptor using maximal descriptor weights. + If props is specified we skip the calculation step and use the props-list of properties. + """ + if not props: + props = properties(mol) + return qed([0.50, 0.25, 0.00, 0.50, 0.00, 0.50, 0.25, 1.00], props, gerebtzoff) + + +def weights_mean(mol, gerebtzoff = True, props = False): + """ + Calculates the QED descriptor using average descriptor weights. + If props is specified we skip the calculation step and use the props-list of properties. + """ + if not props: + props = properties(mol) + return qed([0.66, 0.46, 0.05, 0.61, 0.06, 0.65, 0.48, 0.95], props, gerebtzoff) + + +def weights_none(mol, gerebtzoff = True, props = False): + """ + Calculates the QED descriptor using unit weights. + If props is specified we skip the calculation step and use the props-list of properties. + """ + if not props: + props = properties(mol) + return qed([1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00], props, gerebtzoff) + + +def default(mol, gerebtzoff = True): + """ + Calculates the QED descriptor using average descriptor weights and Gregory Gerebtzoff parameters. + """ + return weights_mean(mol, gerebtzoff) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--input', required=True, help='path to the input file name') + parser.add_argument("-m", "--method", dest="method", choices=['max', 'mean', 'unweighted'], + default="mean", + help="Specify the method you want to use.") + + parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), + default=sys.stdout, help="path to the result file, default it sdtout") + + parser.add_argument("--header", dest="header", action="store_true", + default=False, + help="Write header line.") + + + args = parser.parse_args() + + # Elucidate filetype and open supplier + ifile = os.path.abspath(args.input) + if not os.path.isfile(ifile): + print "Error: ", ifile, " is not a file or cannot be found." + sys.exit(1) + if not os.path.exists(ifile): + print "Error: ", ifile, " does not exist or cannot be found." + sys.exit(1) + if not os.access(ifile, os.R_OK): + print "Error: ", ifile, " is not readable." + sys.exit(1) + + filetype = check_filetype(ifile) + + + """ + We want to store the original SMILES in the output. So in case of a SMILES file iterate over the file and convert each line separate. + """ + + if filetype == 'sdf': + supplier = Chem.SDMolSupplier(ifile) + # Process file + if args.header: + args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\n") + count = 0 + for mol in supplier: + count += 1 + if mol is None: + print "Warning: skipping molecule ", count, " and continuing with next." + continue + props = properties(mol) + + if args.method == 'max': + calc_qed = weights_max(mol, True, props) + elif args.method == 'unweighted': + calc_qed = weights_none(mol, True, props) + else: + calc_qed = weights_mean(mol, True, props) + + args.outfile.write( "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\n" % ( + props[0], + props[1], + props[2], + props[3], + props[4], + props[5], + props[6], + props[7], + props[8], + calc_qed, + mol.GetProp("_Name"), + )) + elif filetype == 'smi': + supplier = Chem.SmilesMolSupplier(ifile, " \t", 0, 1, False, True) + + # Process file + if args.header: + args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\tSMILES\n") + count = 0 + for line in open(ifile): + tokens = line.strip().split('\t') + if len(tokens) > 1: + smiles, title = tokens + else: + smiles = tokens[0] + title = '' + mol = Chem.MolFromSmiles(smiles) + count += 1 + if mol is None: + print "Warning: skipping molecule ", count, " and continuing with next." + continue + props = properties(mol) + + if args.method == 'max': + calc_qed = weights_max(mol, True, props) + elif args.method == 'unweighted': + calc_qed = weights_none(mol, True, props) + else: + calc_qed = weights_mean(mol, True, props) + + args.outfile.write( "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\t%s\n" % ( + props[0], + props[1], + props[2], + props[3], + props[4], + props[5], + props[6], + props[7], + props[8], + calc_qed, + title, + smiles + )) + + else: + print "Error: unknown file extension: ", extension + sys.exit(1) + + sys.exit(0)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qed/silicos_qed.xml Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,98 @@ +<tool id="silicos_qed" name="Drug-likeness"> + <description>quantitative estimation (QED)</description> + <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism> + <requirements> + <requirement type="package" version="1.0.0">silicos_it</requirement> + </requirements> + <command interpreter="python">qed.py -i "${infile}" --method "${method}" -o "${outfile}" 2>&1</command> + <inputs> + <param format="smi,sdf" name="infile" type="data" label="Molecule data in SD- or SMILES-format" help="Dataset missing? See TIP below"/> + <param name="method" type="select" label="Method"> + <option value="max">Max weight (QEDw,max)</option> + <option value="mean">Mean weight (QEDw,mo)</option> + <option value="unweighted">unweighted (QEDw,u)</option> + </param> + </inputs> + <outputs> + <data format="tabular" name="outfile" /> + </outputs> + <tests> + <!-- + Test a tabular input with the first line being a comment without a # character to start + --> + <test> + <param name="infile" value="qed_test.smi"/> + <param name="method" value="max"/> + <output name="outfile" file="qed_test_max.tab"/> + </test> + <test> + <param name="infile" value="qed_test.smi"/> + <param name="method" value="mean"/> + <output name="outfile" file="qed_test_mean.tab"/> + </test> + <test> + <param name="infile" value="qed_test.smi"/> + <param name="method" value="unweighted"/> + <output name="outfile" file="qed_test_unweighted.tab"/> + </test> + </tests> + <help> + +.. class:: infomark + +**TIP:** If your data is not in SD- or SMILES format, use the pencil icon on your dataset to convert your data + + +----- + +**Syntax** + +The QED function comes in three flavors, each differing in the relative weight that is imposed on the underlying molecular descriptors (see [1]). +These three flavors correspond to the three different QED measures that were described in the original publication: + + +- All invalid, blank and comment lines are skipped when performing computations. The number of skipped lines is displayed in the resulting history item. + +- **QEDw,max** using the set of weights that give maximal information content.: + +- **QEDw,mo** using the mean weights of the optimal 1,000 weight combinations that give the highest information content. + +- **QEDw,u** with all weights as unity, hence unweighted. + + +----- + +**Example** + +- Input file:: + + - SD-Format (http://en.wikipedia.org/wiki/Chemical_table_file) + - SMILES Format (http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification) + +- Result:: + + ====== ===== === === ====== ==== ==== ====== ===== ============== + MW ALOGP HBA HBD PSA ROTB AROM ALERTS QED NAME + ------ ----- --- --- ------ ---- ---- ------ ----- -------------- + 286.34 1.092 6 3 101.88 4 2 1 0.737 Abacavir + 181.21 0.481 4 2 83.47 5 0 2 0.487 Acamprosate + 336.43 2.365 5 3 87.66 11 1 1 0.540 Acebutolol + 151.16 1.351 2 2 49.33 2 1 1 0.633 Acetaminophen + 222.25 0.225 5 2 115.04 3 1 1 0.727 Acetazolamide + 324.40 3.291 4 2 92.34 6 1 1 0.772 Acetohexamide + 411.57 3.492 6 1 47.02 7 2 1 0.688 Acetophenazine + 329.37 3.327 4 1 39.72 4 2 0 0.917 Paroxetine + 270.21 3.146 3 1 55.13 4 2 0 0.915 Leflunomide + ====== ===== === === ====== ==== ==== ====== ===== ============== + + +----- + + +**Cite** + +[1] Bickerton, G.R.; Paolini, G.V.; Besnard, J.; Muresan, S.; Hopkins, A.L. (2012) ‘Quantifying the chemical beauty of drugs’, Nature Chemistry, 4, 90-98 + +http://dx.doi.org/10.1038/nchem.1243 + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qed/tool-data/qed_test.smi Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,25 @@ +Nc1nc(NC2CC2)c2ncn([C@@H]3C[C@H](CO)C=C3)c2n1 Abacavir +CC(=O)NCCCS(O)(=O)=O Acamprosate +CCCC(=O)Nc1ccc(OCC(O)CNC(C)C)c(c1)C(C)=O Acebutolol +CC(=O)Nc1ccc(O)cc1 Acetaminophen +CC(=O)Nc1nnc(s1)S(N)(=O)=O Acetazolamide +CC(=O)c1ccc(cc1)S(=O)(=O)NC(=O)NC1CCCCC1 Acetohexamide +CC(=O)c1ccc2Sc3ccccc3N(CCCN3CCN(CCO)CC3)c2c1 Acetophenazine +Fc4ccc(C1CCNCC1COc3ccc2OCOc2c3)cc4 Paroxetine +Cc1oncc1C(=O)Nc2ccc(C(F)(F)F)cc2 Leflunomide +CN1C4CCCC1CC(NC(=O)c2nn(C)c3ccccc23)C4 Granisetron +CCCN2CC(CSC)CC1c3cccc4[nH]cc(CC12)c34 Pergolide +CCc3c(C)[nH]c2CCC(CN1CCOCC1)C(=O)c23 Molindone +CCCCCCCCCCCCCCCC(=O)OCC(NC(=O)C(Cl)Cl)C(O)c1ccc([N+]([O-])=O)cc1 ChloramphenicalPalmitate +CCCCCCCCCCCCCCCOC(=O)C2C(O)C(O)C(C(NC(=O)C1CC(CCC)CN1C)C(C)Cl)OC2SC ClindamycinPalmitate +CCOc3nc2cccc(C(=O)OC(C)OC(=O)OC1CCCCC1)c2n3Cc6ccc(c4ccccc4c5nn[nH]n5)cc6 CandesartanCilexetil +CN(C)CCC=c2c1ccccc1sc3ccc(Cl)cc23 Chlorprothixene +O=c3c(O)c(C2CCC(c1ccc(Cl)cc1)CC2)c(=O)c4ccccc34 Atovaquone +CN(C)CCCN3c1ccccc1CCc2ccc(Cl)cc23 Clomipramine +CN4CCCC(CC3c1ccccc1Sc2ccccc23)C4 Methixene +CCN(CC)C(C)Cn3c1ccccc1sc2ccccc23 Ethopropazine +N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O Famotidine +CNC(=NCCSCc1nc[nH]c1C)NC#N Cimetidine +CCCCCNC(=N)NN=Cc1c[nH]c2ccc(CO)cc12 Tegaserod +C=CC3=C(C(=O)O)N2C(=O)C(NC(=O)C(=NO)c1csc(N)n1)C2SC3 Cefdinir +CC5(C)SC4C(NC(=O)C(C(=O)Oc2ccc1CCCc1c2)c3ccccc3)C(=O)N4C5C(=O)O CarbenicillinIndanyl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qed/tool-data/qed_test_max.tab Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,26 @@ +MW ALOGP HBA HBD PSA ROTB AROM ALERTS QED NAME +286.34 1.092 6 3 101.88 4 2 1 0.715 Abacavir +181.21 0.481 4 2 83.47 5 0 2 0.446 Acamprosate +336.43 2.365 5 3 87.66 11 1 1 0.520 Acebutolol +151.16 1.351 2 2 49.33 2 1 1 0.615 Acetaminophen +222.25 0.225 5 2 115.04 3 1 1 0.722 Acetazolamide +324.40 3.291 4 2 92.34 6 1 1 0.753 Acetohexamide +411.57 3.492 6 1 47.02 7 2 1 0.674 Acetophenazine +329.37 3.327 4 1 39.72 4 2 0 0.913 Paroxetine +270.21 3.146 3 1 55.13 4 2 0 0.908 Leflunomide +312.42 2.318 3 1 50.16 3 2 0 0.935 Granisetron +314.50 3.789 2 1 19.03 4 2 0 0.905 Pergolide +276.38 1.481 3 1 45.33 3 1 0 0.913 Molindone +561.55 6.476 6 2 118.77 23 1 5 0.038 ChloramphenicalPalmitate +663.41 6.279 8 3 108.33 24 0 3 0.057 ClindamycinPalmitate +610.67 5.837 10 1 143.34 13 5 2 0.148 CandesartanCilexetil +315.87 5.079 2 0 3.24 3 3 0 0.698 Chlorprothixene +366.84 5.505 3 1 54.37 2 2 0 0.771 Atovaquone +314.86 4.528 2 0 6.48 4 2 0 0.802 Clomipramine +309.48 5.015 2 0 3.24 2 2 0 0.765 Methixene +312.48 5.020 3 0 6.48 5 2 0 0.761 Ethopropazine +337.46 0.371 6 5 173.33 8 1 3 0.217 Famotidine +252.35 -0.036 5 3 88.89 7 1 5 0.142 Cimetidine +301.39 1.664 3 5 96.29 9 2 4 0.149 Tegaserod +395.42 -0.172 8 4 158.21 6 1 4 0.175 Cefdinir +494.57 2.496 7 2 113.01 8 2 4 0.185 CarbenicillinIndanyl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qed/tool-data/qed_test_mean.tab Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,26 @@ +MW ALOGP HBA HBD PSA ROTB AROM ALERTS QED NAME +286.34 1.092 6 3 101.88 4 2 1 0.737 Abacavir +181.21 0.481 4 2 83.47 5 0 2 0.487 Acamprosate +336.43 2.365 5 3 87.66 11 1 1 0.540 Acebutolol +151.16 1.351 2 2 49.33 2 1 1 0.633 Acetaminophen +222.25 0.225 5 2 115.04 3 1 1 0.727 Acetazolamide +324.40 3.291 4 2 92.34 6 1 1 0.772 Acetohexamide +411.57 3.492 6 1 47.02 7 2 1 0.688 Acetophenazine +329.37 3.327 4 1 39.72 4 2 0 0.917 Paroxetine +270.21 3.146 3 1 55.13 4 2 0 0.915 Leflunomide +312.42 2.318 3 1 50.16 3 2 0 0.947 Granisetron +314.50 3.789 2 1 19.03 4 2 0 0.898 Pergolide +276.38 1.481 3 1 45.33 3 1 0 0.918 Molindone +561.55 6.476 6 2 118.77 23 1 5 0.052 ChloramphenicalPalmitate +663.41 6.279 8 3 108.33 24 0 3 0.063 ClindamycinPalmitate +610.67 5.837 10 1 143.34 13 5 2 0.126 CandesartanCilexetil +315.87 5.079 2 0 3.24 3 3 0 0.636 Chlorprothixene +366.84 5.505 3 1 54.37 2 2 0 0.741 Atovaquone +314.86 4.528 2 0 6.48 4 2 0 0.782 Clomipramine +309.48 5.015 2 0 3.24 2 2 0 0.735 Methixene +312.48 5.020 3 0 6.48 5 2 0 0.734 Ethopropazine +337.46 0.371 6 5 173.33 8 1 3 0.266 Famotidine +252.35 -0.036 5 3 88.89 7 1 5 0.214 Cimetidine +301.39 1.664 3 5 96.29 9 2 4 0.213 Tegaserod +395.42 -0.172 8 4 158.21 6 1 4 0.231 Cefdinir +494.57 2.496 7 2 113.01 8 2 4 0.251 CarbenicillinIndanyl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qed/tool-data/qed_test_unweighted.tab Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,26 @@ +MW ALOGP HBA HBD PSA ROTB AROM ALERTS QED NAME +286.34 1.092 6 3 101.88 4 2 1 0.713 Abacavir +181.21 0.481 4 2 83.47 5 0 2 0.589 Acamprosate +336.43 2.365 5 3 87.66 11 1 1 0.599 Acebutolol +151.16 1.351 2 2 49.33 2 1 1 0.716 Acetaminophen +222.25 0.225 5 2 115.04 3 1 1 0.708 Acetazolamide +324.40 3.291 4 2 92.34 6 1 1 0.796 Acetohexamide +411.57 3.492 6 1 47.02 7 2 1 0.711 Acetophenazine +329.37 3.327 4 1 39.72 4 2 0 0.919 Paroxetine +270.21 3.146 3 1 55.13 4 2 0 0.937 Leflunomide +312.42 2.318 3 1 50.16 3 2 0 0.965 Granisetron +314.50 3.789 2 1 19.03 4 2 0 0.857 Pergolide +276.38 1.481 3 1 45.33 3 1 0 0.936 Molindone +561.55 6.476 6 2 118.77 23 1 5 0.111 ChloramphenicalPalmitate +663.41 6.279 8 3 108.33 24 0 3 0.101 ClindamycinPalmitate +610.67 5.837 10 1 143.34 13 5 2 0.116 CandesartanCilexetil +315.87 5.079 2 0 3.24 3 3 0 0.559 Chlorprothixene +366.84 5.505 3 1 54.37 2 2 0 0.759 Atovaquone +314.86 4.528 2 0 6.48 4 2 0 0.707 Clomipramine +309.48 5.015 2 0 3.24 2 2 0 0.651 Methixene +312.48 5.020 3 0 6.48 5 2 0 0.669 Ethopropazine +337.46 0.371 6 5 173.33 8 1 3 0.299 Famotidine +252.35 -0.036 5 3 88.89 7 1 5 0.375 Cimetidine +301.39 1.664 3 5 96.29 9 2 4 0.360 Tegaserod +395.42 -0.172 8 4 158.21 6 1 4 0.267 Cefdinir +494.57 2.496 7 2 113.01 8 2 4 0.357 CarbenicillinIndanyl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,4 @@ +<?xml version="1.0"?> +<repositories description="This requires the Molecule datatype definitions (e.g. SMILES, InChI, SD-format)."> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="molecule_datatypes" owner="bgruening" changeset_revision="25698453d7d7" /> +</repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shape-it/shape-it.xml Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,180 @@ +<tool id="shapeit" name="Shape-it"> + <description> alignment of a reference molecule against a database of molecules using the shape of the molecules</description> + <requirements> + <requirement type="package" version="1.0.0">silicos_it</requirement> + </requirements> + <command > + shape-it --Format ${database.ext} --dbase $database --reference $reference --addIterations $addIterations --best $best --out $output 2>&1 + </command> + <inputs> + <param name="database" type="data" format='sdf,mol,mol2,smi' label="database" /> + <param name="reference" type="data" format='sdf,mol,mol2,smi' label="refrence" /> + <param name="cutoff" type="float" value="0.0" /> + <param name='addIterations' type='integer' value='0' label='Perform N additional optimization steps with the simulated annealing procedure' /> + <param name='best' type='integer' value='0' label='the N best scoring molecules are reported' /> + </inputs> + <outputs> + <data name="output" format_source='database' /> + </outputs> + <tests> + <test> + <param name="database" ftype='sdf' value="CID_3033.sdf" /> + <param name="reference" type="sdf" value='CID_3037.sdf' /> + <param name="cutoff" value="0.0" /> + <param name='addIterations' value='0' /> + <param name='best' value='0' /> + <output name="output" ftype='sdf' file="shapeit_on_CID3033_and_CID3037.sdf" /> + </test> + </tests> + <help> + +**What it does** + +Shape-it is a program for the alignment of a reference molecule against a +database of molecules using the shape of the molecules. It is based on +the use of Gaussian volumes as descriptor for molecular shape as it was +introduced by Grant and Pickup1 . + +The program expects one reference molecule with +its three-dimensional coordinates and one database files containing one +or more molecules in three dimensions. The results are either the alignment +of all database molecules and their respective scores or the N best +scoring molecules from the complete database. + +----- + +**Example** + +* input:: + + - database + 27 28 0 0 0 0 0 0 0999 V2000 + -4.8550 1.3401 0.2120 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8529 -1.3406 0.2121 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1809 -2.1668 -0.3789 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1788 2.1664 -0.3787 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0011 -0.0002 1.4744 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2222 -0.2738 0.6597 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2377 0.2772 0.6480 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2586 -1.3462 -0.2316 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2565 1.3457 -0.2314 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3343 0.5568 0.7972 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3322 -0.5574 0.7972 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4069 -1.5879 -0.9855 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4048 1.5875 -0.9852 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4827 0.3152 0.0433 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4807 -0.3156 0.0435 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.5190 -0.7571 -0.8481 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5170 0.7568 -0.8478 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1548 0.8649 2.1342 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1601 -0.8435 2.1593 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3089 1.3938 1.4913 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3053 -1.3909 1.4943 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4415 -2.4213 -1.6818 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4469 2.4191 -1.6835 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.4070 -0.9574 -1.4422 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.4050 0.9570 -1.4418 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2961 -2.2262 0.4641 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3872 2.8487 -1.0397 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 14 1 0 0 0 0 + 2 15 1 0 0 0 0 + 3 8 1 0 0 0 0 + 3 26 1 0 0 0 0 + 4 9 1 0 0 0 0 + 4 27 1 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 1 0 0 0 0 + ..... + + - reference + + 30 31 0 0 0 0 0 0 0999 V2000 + 1.9541 1.1500 -2.5078 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1377 -1.6392 2.1136 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -3.2620 -2.9284 -1.0647 O 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7906 -1.9108 0.9092 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2679 -0.2051 -0.3990 N 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0640 0.5139 -0.3769 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7313 0.7178 -0.0192 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4761 -0.6830 -1.1703 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6571 -0.2482 -0.1795 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.0382 1.4350 0.0081 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3728 1.8429 0.7234 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.6797 2.5600 0.7506 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3470 2.7640 1.1083 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5353 0.3477 -1.0918 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1740 -0.8865 0.9534 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.8480 -1.8749 -0.3123 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9124 0.3058 -0.8739 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5511 -0.9285 1.1713 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.4203 -0.3324 0.2576 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7086 -0.9792 -1.8930 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.3614 -0.4266 -1.7676 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0861 -1.1146 -0.6780 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.0812 1.2885 -0.2604 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.6569 2.0278 1.0167 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4382 3.2769 1.0511 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0683 3.6399 1.6868 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6037 0.7654 -1.5758 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9635 -1.4215 2.0480 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4925 -0.3651 0.4274 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.5025 -3.7011 -0.5102 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 14 1 0 0 0 0 + 2 15 1 0 0 0 0 + 3 16 1 0 0 0 0 + 3 30 1 0 0 0 0 + 4 16 2 0 0 0 0 + 5 7 1 0 0 0 0 + 5 9 1 0 0 0 0 + 5 22 1 0 0 0 0 + 6 7 1 0 0 0 0 + 6 8 1 0 0 0 0 + ...... + + - cutoff : 0.0 + +* output:: + + 27 28 0 0 0 0 0 0 0 0999 V2000 + -4.8550 1.3401 0.2120 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8529 -1.3406 0.2121 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1809 -2.1668 -0.3789 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1788 2.1664 -0.3787 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0011 -0.0002 1.4744 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2222 -0.2738 0.6597 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2377 0.2772 0.6480 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2586 -1.3462 -0.2316 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2565 1.3457 -0.2314 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3343 0.5568 0.7972 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3322 -0.5574 0.7972 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4069 -1.5879 -0.9855 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4048 1.5875 -0.9852 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4827 0.3152 0.0433 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4807 -0.3156 0.0435 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.5190 -0.7571 -0.8481 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5170 0.7568 -0.8478 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1548 0.8649 2.1342 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1601 -0.8435 2.1593 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3089 1.3938 1.4913 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3053 -1.3909 1.4943 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4415 -2.4213 -1.6818 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4469 2.4191 -1.6835 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.4070 -0.9574 -1.4422 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.4050 0.9570 -1.4418 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2961 -2.2262 0.4641 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3872 2.8487 -1.0397 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 14 1 0 0 0 0 + 2 15 1 0 0 0 0 + 3 8 1 0 0 0 0 + 3 26 1 0 0 0 0 + 4 9 1 0 0 0 0 + 4 27 1 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 1 0 0 0 0 + 5 18 1 0 0 0 0 + 5 19 1 0 0 0 0 + 6 8 2 0 0 0 0 + ...... + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shape-it/test_data/CID_3033.sdf Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,271 @@ +3033 + -OEChem-08231107463D + + 30 31 0 0 0 0 0 0 0999 V2000 + 1.9541 1.1500 -2.5078 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1377 -1.6392 2.1136 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -3.2620 -2.9284 -1.0647 O 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7906 -1.9108 0.9092 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2679 -0.2051 -0.3990 N 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0640 0.5139 -0.3769 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7313 0.7178 -0.0192 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4761 -0.6830 -1.1703 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6571 -0.2482 -0.1795 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.0382 1.4350 0.0081 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3728 1.8429 0.7234 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.6797 2.5600 0.7506 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3470 2.7640 1.1083 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5353 0.3477 -1.0918 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1740 -0.8865 0.9534 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.8480 -1.8749 -0.3123 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9124 0.3058 -0.8739 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5511 -0.9285 1.1713 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.4203 -0.3324 0.2576 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7086 -0.9792 -1.8930 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.3614 -0.4266 -1.7676 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0861 -1.1146 -0.6780 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.0812 1.2885 -0.2604 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.6569 2.0278 1.0167 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4382 3.2769 1.0511 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0683 3.6399 1.6868 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6037 0.7654 -1.5758 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9635 -1.4215 2.0480 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4925 -0.3651 0.4274 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.5025 -3.7011 -0.5102 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 14 1 0 0 0 0 + 2 15 1 0 0 0 0 + 3 16 1 0 0 0 0 + 3 30 1 0 0 0 0 + 4 16 2 0 0 0 0 + 5 7 1 0 0 0 0 + 5 9 1 0 0 0 0 + 5 22 1 0 0 0 0 + 6 7 1 0 0 0 0 + 6 8 1 0 0 0 0 + 6 10 2 0 0 0 0 + 7 11 2 0 0 0 0 + 8 16 1 0 0 0 0 + 8 20 1 0 0 0 0 + 8 21 1 0 0 0 0 + 9 14 2 0 0 0 0 + 9 15 1 0 0 0 0 + 10 12 1 0 0 0 0 + 10 23 1 0 0 0 0 + 11 13 1 0 0 0 0 + 11 24 1 0 0 0 0 + 12 13 2 0 0 0 0 + 12 25 1 0 0 0 0 + 13 26 1 0 0 0 0 + 14 17 1 0 0 0 0 + 15 18 2 0 0 0 0 + 17 19 2 0 0 0 0 + 17 27 1 0 0 0 0 + 18 19 1 0 0 0 0 + 18 28 1 0 0 0 0 + 19 29 1 0 0 0 0 +M END +> <PUBCHEM_COMPOUND_CID> +3033 + +> <PUBCHEM_CONFORMER_RMSD> +0.6 + +> <PUBCHEM_CONFORMER_DIVERSEORDER> +1 +20 +18 +39 +29 +42 +38 +35 +30 +25 +33 +28 +32 +36 +26 +24 +40 +11 +27 +37 +7 +41 +10 +19 +43 +8 +6 +16 +44 +23 +34 +14 +15 +31 +9 +13 +17 +21 +22 +5 +12 +2 +3 +4 + +> <PUBCHEM_MMFF94_PARTIAL_CHARGES> +28 +1 -0.18 +10 -0.15 +11 -0.15 +12 -0.15 +13 -0.15 +14 0.18 +15 0.18 +16 0.66 +17 -0.15 +18 -0.15 +19 -0.15 +2 -0.18 +22 0.4 +23 0.15 +24 0.15 +25 0.15 +26 0.15 +27 0.15 +28 0.15 +29 0.15 +3 -0.65 +30 0.5 +4 -0.57 +5 -0.6 +6 -0.14 +7 0.1 +8 0.2 +9 0.1 + +> <PUBCHEM_EFFECTIVE_ROTOR_COUNT> +4 + +> <PUBCHEM_PHARMACOPHORE_FEATURES> +7 +1 3 acceptor +1 4 acceptor +1 5 cation +1 5 donor +3 3 4 16 anion +6 6 7 10 11 12 13 rings +6 9 14 15 17 18 19 rings + +> <PUBCHEM_HEAVY_ATOM_COUNT> +19 + +> <PUBCHEM_ATOM_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_ISOTOPIC_ATOM_COUNT> +0 + +> <PUBCHEM_COMPONENT_COUNT> +1 + +> <PUBCHEM_CACTVS_TAUTO_COUNT> +1 + +> <PUBCHEM_CONFORMER_ID> +00000BD900000001 + +> <PUBCHEM_MMFF94_ENERGY> +65.6362 + +> <PUBCHEM_FEATURE_SELFOVERLAP> +35.578 + +> <PUBCHEM_SHAPE_FINGERPRINT> +10366900 7 17386020514759110480 +114674 6 16903282898360328323 +11578080 2 17913245089295617604 +11582403 64 14544541357940910356 +11640471 11 18127963303313961600 +12236239 1 18272088352834916308 +12363563 72 18042978579496277287 +12553582 1 18190740839094073615 +12596599 1 18201439237582433270 +12788726 201 18410285909464206003 +13032168 30 18201440238019390274 +13140716 1 18187086113919468457 +13538477 17 18339642338307470464 +13583140 156 17241914119188522922 +13764800 53 17895191172601517065 +13965767 371 17259888045752176376 +14115302 16 18342181093776810149 +14787075 74 17907866106787333628 +15279307 12 18198622322777022915 +15375462 189 18270674264943931347 +15669948 3 18336550511731321249 +16752209 62 18336841852664817743 +16945 1 18188484791351783177 +19433438 48 18059583550169763352 +200 152 18130792217719576158 +20645476 183 18270115859187436189 +20905425 154 17970632883131290416 +21452121 199 18046637711133085653 +21639500 275 16988270998321974524 +22112679 90 18342446063036096292 +23419403 2 17835564502519425292 +23493267 7 18115023138028600728 +23526113 38 16660924516543134566 +23557571 272 17821721762863303772 +23559900 14 17896315990920094510 +23598288 3 18411412925846384519 +23598291 2 18059009613384180254 +238 59 16343141308025475526 +4340502 62 17273677940604857177 +6049 1 17240202131864233360 +6992083 37 18058168521433072460 +7615 1 18201433675414973908 +77492 1 18272651289913926852 +81228 2 17968373550240022809 +9709674 26 17896035610527288590 + +> <PUBCHEM_SHAPE_MULTIPOLES> +378.03 +7.01 +2.75 +1.77 +0.78 +1.58 +0.3 +0.41 +1.94 +-1.08 +1.9 +-8.69 +11.04 +2.58 + +> <PUBCHEM_SHAPE_SELFOVERLAP> +790.335 + +> <PUBCHEM_SHAPE_VOLUME> +214.7 + +> <PUBCHEM_COORDINATE_TYPE> +2 +5 +255 + +$$$$ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shape-it/test_data/CID_3037.sdf Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,220 @@ +3037 + -OEChem-08231108593D + + 27 28 0 0 0 0 0 0 0999 V2000 + -4.8550 1.3401 0.2120 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8529 -1.3406 0.2121 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1809 -2.1668 -0.3789 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1788 2.1664 -0.3787 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0011 -0.0002 1.4744 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2222 -0.2738 0.6597 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2377 0.2772 0.6480 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2586 -1.3462 -0.2316 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2565 1.3457 -0.2314 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3343 0.5568 0.7972 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3322 -0.5574 0.7972 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4069 -1.5879 -0.9855 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4048 1.5875 -0.9852 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4827 0.3152 0.0433 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4807 -0.3156 0.0435 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.5190 -0.7571 -0.8481 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5170 0.7568 -0.8478 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1548 0.8649 2.1342 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1601 -0.8435 2.1593 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3089 1.3938 1.4913 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3053 -1.3909 1.4943 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4415 -2.4213 -1.6818 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4469 2.4191 -1.6835 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.4070 -0.9574 -1.4422 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.4050 0.9570 -1.4418 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2961 -2.2262 0.4641 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3872 2.8487 -1.0397 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 14 1 0 0 0 0 + 2 15 1 0 0 0 0 + 3 8 1 0 0 0 0 + 3 26 1 0 0 0 0 + 4 9 1 0 0 0 0 + 4 27 1 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 1 0 0 0 0 + 5 18 1 0 0 0 0 + 5 19 1 0 0 0 0 + 6 8 2 0 0 0 0 + 6 10 1 0 0 0 0 + 7 9 2 0 0 0 0 + 7 11 1 0 0 0 0 + 8 12 1 0 0 0 0 + 9 13 1 0 0 0 0 + 10 14 2 0 0 0 0 + 10 20 1 0 0 0 0 + 11 15 2 0 0 0 0 + 11 21 1 0 0 0 0 + 12 16 2 0 0 0 0 + 12 22 1 0 0 0 0 + 13 17 2 0 0 0 0 + 13 23 1 0 0 0 0 + 14 16 1 0 0 0 0 + 15 17 1 0 0 0 0 + 16 24 1 0 0 0 0 + 17 25 1 0 0 0 0 +M END +> <PUBCHEM_COMPOUND_CID> +3037 + +> <PUBCHEM_CONFORMER_RMSD> +0.6 + +> <PUBCHEM_CONFORMER_DIVERSEORDER> +8 +10 +12 +1 +7 +5 +11 +3 +6 +9 +4 +2 + +> <PUBCHEM_MMFF94_PARTIAL_CHARGES> +25 +1 -0.18 +10 -0.15 +11 -0.15 +12 -0.15 +13 -0.15 +14 0.18 +15 0.18 +16 -0.15 +17 -0.15 +2 -0.18 +20 0.15 +21 0.15 +22 0.15 +23 0.15 +24 0.15 +25 0.15 +26 0.45 +27 0.45 +3 -0.53 +4 -0.53 +5 0.29 +6 -0.14 +7 -0.14 +8 0.08 +9 0.08 + +> <PUBCHEM_EFFECTIVE_ROTOR_COUNT> +2 + +> <PUBCHEM_PHARMACOPHORE_FEATURES> +4 +1 3 donor +1 4 donor +6 6 8 10 12 14 16 rings +6 7 9 11 13 15 17 rings + +> <PUBCHEM_HEAVY_ATOM_COUNT> +17 + +> <PUBCHEM_ATOM_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_ISOTOPIC_ATOM_COUNT> +0 + +> <PUBCHEM_COMPONENT_COUNT> +1 + +> <PUBCHEM_CACTVS_TAUTO_COUNT> +5 + +> <PUBCHEM_CONFORMER_ID> +00000BDD00000008 + +> <PUBCHEM_MMFF94_ENERGY> +44.6858 + +> <PUBCHEM_FEATURE_SELFOVERLAP> +20.297 + +> <PUBCHEM_SHAPE_FINGERPRINT> +10062212 137 18261117369936506423 +104564 63 17986963035811110412 +11458722 120 18339359768245870841 +11471102 22 5472872458301843344 +11578080 2 18190204380446433792 +116883 192 18265608969609498196 +12236239 1 18410856576819659107 +12592029 89 18338223951597366363 +13549 16 18410575084668353682 +13693222 15 6555421915516066822 +13764800 53 14189033175566991199 +14115302 16 18186237320680093898 +14341114 328 10087642619424135543 +14787075 74 9511159855286719151 +14993402 34 18410855451538227223 +15099037 51 18340768233908588503 +15207287 21 15719111361650760302 +15375358 24 15647053767618106914 +15775835 57 18272650117329930317 +16945 1 17906452130063974618 +17834072 14 15936410035134206066 +18186145 218 17132117918276567720 +19422 9 18271525295227750719 +20279233 1 15719389529571237654 +20645476 183 18339080393619327415 +23402539 116 18186809105365620101 +23402655 69 18342736308283284156 +23559900 14 17603590712323212176 +25 1 17561083592297532664 +26918003 58 6266902359448424189 +296302 2 15213020427345972082 +3082319 5 18338798905472319583 +34934 24 18341891845236497020 +633830 44 17703790310130762689 +74978 22 18266740181857992718 +7832392 63 18340206284835898173 +81228 2 15720767252053392762 +9981440 41 17403743242177431832 + +> <PUBCHEM_SHAPE_MULTIPOLES> +341.85 +8.38 +1.9 +1.1 +0.02 +0 +-1.15 +1.94 +-0.01 +0 +-0.39 +-4.15 +0.01 +0 + +> <PUBCHEM_SHAPE_SELFOVERLAP> +722.787 + +> <PUBCHEM_SHAPE_VOLUME> +193 + +> <PUBCHEM_COORDINATE_TYPE> +2 +5 +255 + +$$$$ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shape-it/test_data/shapeit_on_CID30333_and_CID3037.sdf Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,498 @@ +3033 + OpenBabel06221213273D + + 30 31 0 0 0 0 0 0 0 0999 V2000 + 1.9541 1.1500 -2.5078 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1377 -1.6392 2.1136 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -3.2620 -2.9284 -1.0647 O 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7906 -1.9108 0.9092 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2679 -0.2051 -0.3990 N 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0640 0.5139 -0.3769 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7313 0.7178 -0.0192 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4761 -0.6830 -1.1703 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6571 -0.2482 -0.1795 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.0382 1.4350 0.0081 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3728 1.8429 0.7234 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.6797 2.5600 0.7506 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3470 2.7640 1.1083 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5353 0.3477 -1.0918 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1740 -0.8865 0.9534 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.8480 -1.8749 -0.3123 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9124 0.3058 -0.8739 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5511 -0.9285 1.1713 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.4203 -0.3324 0.2576 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7086 -0.9792 -1.8930 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.3614 -0.4266 -1.7676 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0861 -1.1146 -0.6780 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.0812 1.2885 -0.2604 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.6569 2.0278 1.0167 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4382 3.2769 1.0511 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0683 3.6399 1.6868 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6037 0.7654 -1.5758 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9635 -1.4215 2.0480 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4925 -0.3651 0.4274 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.5025 -3.7011 -0.5102 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 14 1 0 0 0 0 + 2 15 1 0 0 0 0 + 3 16 1 0 0 0 0 + 3 30 1 0 0 0 0 + 4 16 2 0 0 0 0 + 5 7 1 0 0 0 0 + 5 9 1 0 0 0 0 + 5 22 1 0 0 0 0 + 6 7 1 0 0 0 0 + 6 8 1 0 0 0 0 + 6 10 2 0 0 0 0 + 7 11 2 0 0 0 0 + 8 16 1 0 0 0 0 + 8 20 1 0 0 0 0 + 8 21 1 0 0 0 0 + 9 14 2 0 0 0 0 + 9 15 1 0 0 0 0 + 10 12 1 0 0 0 0 + 10 23 1 0 0 0 0 + 11 13 1 0 0 0 0 + 11 24 1 0 0 0 0 + 12 13 2 0 0 0 0 + 12 25 1 0 0 0 0 + 13 26 1 0 0 0 0 + 14 17 1 0 0 0 0 + 15 18 2 0 0 0 0 + 17 19 2 0 0 0 0 + 17 27 1 0 0 0 0 + 18 19 1 0 0 0 0 + 18 28 1 0 0 0 0 + 19 29 1 0 0 0 0 +M END +> <PUBCHEM_COMPOUND_CID> +3033 + +> <PUBCHEM_CONFORMER_RMSD> +0.6 + +> <PUBCHEM_CONFORMER_DIVERSEORDER> +1 +20 +18 +39 +29 +42 +38 +35 +30 +25 +33 +28 +32 +36 +26 +24 +40 +11 +27 +37 +7 +41 +10 +19 +43 +8 +6 +16 +44 +23 +34 +14 +15 +31 +9 +13 +17 +21 +22 +5 +12 +2 +3 +4 + +> <PUBCHEM_MMFF94_PARTIAL_CHARGES> +28 +1 -0.18 +10 -0.15 +11 -0.15 +12 -0.15 +13 -0.15 +14 0.18 +15 0.18 +16 0.66 +17 -0.15 +18 -0.15 +19 -0.15 +2 -0.18 +22 0.4 +23 0.15 +24 0.15 +25 0.15 +26 0.15 +27 0.15 +28 0.15 +29 0.15 +3 -0.65 +30 0.5 +4 -0.57 +5 -0.6 +6 -0.14 +7 0.1 +8 0.2 +9 0.1 + +> <PUBCHEM_EFFECTIVE_ROTOR_COUNT> +4 + +> <PUBCHEM_PHARMACOPHORE_FEATURES> +7 +1 3 acceptor +1 4 acceptor +1 5 cation +1 5 donor +3 3 4 16 anion +6 6 7 10 11 12 13 rings +6 9 14 15 17 18 19 rings + +> <PUBCHEM_HEAVY_ATOM_COUNT> +19 + +> <PUBCHEM_ATOM_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_ISOTOPIC_ATOM_COUNT> +0 + +> <PUBCHEM_COMPONENT_COUNT> +1 + +> <PUBCHEM_CACTVS_TAUTO_COUNT> +1 + +> <PUBCHEM_CONFORMER_ID> +00000BD900000001 + +> <PUBCHEM_MMFF94_ENERGY> +65.6362 + +> <PUBCHEM_FEATURE_SELFOVERLAP> +35.578 + +> <PUBCHEM_SHAPE_FINGERPRINT> +10366900 7 17386020514759110480 +114674 6 16903282898360328323 +11578080 2 17913245089295617604 +11582403 64 14544541357940910356 +11640471 11 18127963303313961600 +12236239 1 18272088352834916308 +12363563 72 18042978579496277287 +12553582 1 18190740839094073615 +12596599 1 18201439237582433270 +12788726 201 18410285909464206003 +13032168 30 18201440238019390274 +13140716 1 18187086113919468457 +13538477 17 18339642338307470464 +13583140 156 17241914119188522922 +13764800 53 17895191172601517065 +13965767 371 17259888045752176376 +14115302 16 18342181093776810149 +14787075 74 17907866106787333628 +15279307 12 18198622322777022915 +15375462 189 18270674264943931347 +15669948 3 18336550511731321249 +16752209 62 18336841852664817743 +16945 1 18188484791351783177 +19433438 48 18059583550169763352 +200 152 18130792217719576158 +20645476 183 18270115859187436189 +20905425 154 17970632883131290416 +21452121 199 18046637711133085653 +21639500 275 16988270998321974524 +22112679 90 18342446063036096292 +23419403 2 17835564502519425292 +23493267 7 18115023138028600728 +23526113 38 16660924516543134566 +23557571 272 17821721762863303772 +23559900 14 17896315990920094510 +23598288 3 18411412925846384519 +23598291 2 18059009613384180254 +238 59 16343141308025475526 +4340502 62 17273677940604857177 +6049 1 17240202131864233360 +6992083 37 18058168521433072460 +7615 1 18201433675414973908 +77492 1 18272651289913926852 +81228 2 17968373550240022809 +9709674 26 17896035610527288590 + +> <PUBCHEM_SHAPE_MULTIPOLES> +378.03 +7.01 +2.75 +1.77 +0.78 +1.58 +0.3 +0.41 +1.94 +-1.08 +1.9 +-8.69 +11.04 +2.58 + +> <PUBCHEM_SHAPE_SELFOVERLAP> +790.335 + +> <PUBCHEM_SHAPE_VOLUME> +214.7 + +> <PUBCHEM_COORDINATE_TYPE> +2 +5 +255 + +$$$$ +3037 + OpenBabel06221213273D + + 27 28 0 0 0 0 0 0 0 0999 V2000 + -4.6270 1.4735 -0.9536 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8388 -1.1619 1.2563 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -0.2498 -1.8412 1.1315 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.4506 1.3706 -1.7145 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1177 0.9916 1.0973 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1288 0.3277 0.6123 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.3667 0.5707 0.3508 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2580 -1.0607 0.6511 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4615 0.7850 -1.0132 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.1705 1.1115 0.1163 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3933 -0.0252 1.0634 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4288 -1.6652 0.1938 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.6182 0.3915 -1.6859 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.3414 0.5070 -0.3410 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5502 -0.4186 0.3908 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4706 -0.8814 -0.3023 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6626 -0.2104 -0.9839 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0389 2.0843 1.0134 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2183 0.8049 2.1749 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0727 2.1943 0.0848 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3073 -0.1842 2.1352 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.5356 -2.7462 0.2209 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7192 0.5473 -2.7566 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.3771 -1.3662 -0.6554 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5580 -0.5122 -1.5211 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2289 -1.3586 1.8240 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7059 1.4288 -2.6511 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 14 1 0 0 0 0 + 2 15 1 0 0 0 0 + 3 8 1 0 0 0 0 + 3 26 1 0 0 0 0 + 4 9 1 0 0 0 0 + 4 27 1 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 1 0 0 0 0 + 5 18 1 0 0 0 0 + 5 19 1 0 0 0 0 + 6 8 2 0 0 0 0 + 6 10 1 0 0 0 0 + 7 9 2 0 0 0 0 + 7 11 1 0 0 0 0 + 8 12 1 0 0 0 0 + 9 13 1 0 0 0 0 + 10 14 2 0 0 0 0 + 10 20 1 0 0 0 0 + 11 15 2 0 0 0 0 + 11 21 1 0 0 0 0 + 12 16 2 0 0 0 0 + 12 22 1 0 0 0 0 + 13 17 2 0 0 0 0 + 13 23 1 0 0 0 0 + 14 16 1 0 0 0 0 + 15 17 1 0 0 0 0 + 16 24 1 0 0 0 0 + 17 25 1 0 0 0 0 +M END +> <PUBCHEM_COMPOUND_CID> +3037 + +> <PUBCHEM_CONFORMER_RMSD> +0.6 + +> <PUBCHEM_CONFORMER_DIVERSEORDER> +8 +10 +12 +1 +7 +5 +11 +3 +6 +9 +4 +2 + +> <PUBCHEM_MMFF94_PARTIAL_CHARGES> +25 +1 -0.18 +10 -0.15 +11 -0.15 +12 -0.15 +13 -0.15 +14 0.18 +15 0.18 +16 -0.15 +17 -0.15 +2 -0.18 +20 0.15 +21 0.15 +22 0.15 +23 0.15 +24 0.15 +25 0.15 +26 0.45 +27 0.45 +3 -0.53 +4 -0.53 +5 0.29 +6 -0.14 +7 -0.14 +8 0.08 +9 0.08 + +> <PUBCHEM_EFFECTIVE_ROTOR_COUNT> +2 + +> <PUBCHEM_PHARMACOPHORE_FEATURES> +4 +1 3 donor +1 4 donor +6 6 8 10 12 14 16 rings +6 7 9 11 13 15 17 rings + +> <PUBCHEM_HEAVY_ATOM_COUNT> +17 + +> <PUBCHEM_ATOM_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_ISOTOPIC_ATOM_COUNT> +0 + +> <PUBCHEM_COMPONENT_COUNT> +1 + +> <PUBCHEM_CACTVS_TAUTO_COUNT> +5 + +> <PUBCHEM_CONFORMER_ID> +00000BDD00000008 + +> <PUBCHEM_MMFF94_ENERGY> +44.6858 + +> <PUBCHEM_FEATURE_SELFOVERLAP> +20.297 + +> <PUBCHEM_SHAPE_FINGERPRINT> +10062212 137 18261117369936506423 +104564 63 17986963035811110412 +11458722 120 18339359768245870841 +11471102 22 5472872458301843344 +11578080 2 18190204380446433792 +116883 192 18265608969609498196 +12236239 1 18410856576819659107 +12592029 89 18338223951597366363 +13549 16 18410575084668353682 +13693222 15 6555421915516066822 +13764800 53 14189033175566991199 +14115302 16 18186237320680093898 +14341114 328 10087642619424135543 +14787075 74 9511159855286719151 +14993402 34 18410855451538227223 +15099037 51 18340768233908588503 +15207287 21 15719111361650760302 +15375358 24 15647053767618106914 +15775835 57 18272650117329930317 +16945 1 17906452130063974618 +17834072 14 15936410035134206066 +18186145 218 17132117918276567720 +19422 9 18271525295227750719 +20279233 1 15719389529571237654 +20645476 183 18339080393619327415 +23402539 116 18186809105365620101 +23402655 69 18342736308283284156 +23559900 14 17603590712323212176 +25 1 17561083592297532664 +26918003 58 6266902359448424189 +296302 2 15213020427345972082 +3082319 5 18338798905472319583 +34934 24 18341891845236497020 +633830 44 17703790310130762689 +74978 22 18266740181857992718 +7832392 63 18340206284835898173 +81228 2 15720767252053392762 +9981440 41 17403743242177431832 + +> <PUBCHEM_SHAPE_MULTIPOLES> +341.85 +8.38 +1.9 +1.1 +0.02 +0 +-1.15 +1.94 +-0.01 +0 +-0.39 +-4.15 +0.01 +0 + +> <PUBCHEM_SHAPE_SELFOVERLAP> +722.787 + +> <PUBCHEM_SHAPE_VOLUME> +193 + +> <PUBCHEM_COORDINATE_TYPE> +2 +5 +255 + +> <Shape-it::Tanimoto> +0.606434 + +> <Shape-it::Tversky_Ref> +0.721775 + +> <Shape-it::Tversky_Db> +0.791445 + +$$$$
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strip-it/strip-it.xml Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,88 @@ +<tool id="stripit" name="Strip-it"> + <description> extracts predefined scaffolds from molecules</description> + <requirements> + <requirement type="package" version="1.0.0">silicos_it</requirement> + </requirements> + <command > + strip-it --inputFormat ${infile.ext} --input $infile --output $outfile 2>&1 + </command> + <inputs> + <param name="infile" type="data" format='sdf,mol,mol2,smi' label="Molecule file" /> + </inputs> + <outputs> + <data name="outfile" format="tabular" /> + </outputs> + <tests> + <test> + <param name="infile" type='sdf' value="CID_3037.sdf"/> + <data name="output" type="tabular" file="Strip-it_on_CID3037.tabular" /> + </test> + </tests> + <help> + +**What it does** + +Strip-it is a program that extracts predefined scaffolds from organic +molecules + +The program comes with a number of predefined molecular scaffolds for +extraction. These scaffolds include, amongst others + + - Molecular frameworks as originally described by Bemis and + Murcko;1 + - Molecular frameworks and the reduced molecular frameworks as + described by Ansgar Schuffenhauer and coworkers;2 + - Scaffold topologies as described by Sara Pollock and coworkers.3 + +----- + +**Example** + +* input :: + + - input + 27 28 0 0 0 0 0 0 0999 V2000 + -4.8550 1.3401 0.2120 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8529 -1.3406 0.2121 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1809 -2.1668 -0.3789 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1788 2.1664 -0.3787 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0011 -0.0002 1.4744 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2222 -0.2738 0.6597 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2377 0.2772 0.6480 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2586 -1.3462 -0.2316 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2565 1.3457 -0.2314 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3343 0.5568 0.7972 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3322 -0.5574 0.7972 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4069 -1.5879 -0.9855 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4048 1.5875 -0.9852 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4827 0.3152 0.0433 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4807 -0.3156 0.0435 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.5190 -0.7571 -0.8481 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5170 0.7568 -0.8478 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1548 0.8649 2.1342 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1601 -0.8435 2.1593 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3089 1.3938 1.4913 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3053 -1.3909 1.4943 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4415 -2.4213 -1.6818 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4469 2.4191 -1.6835 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.4070 -0.9574 -1.4422 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.4050 0.9570 -1.4418 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2961 -2.2262 0.4641 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3872 2.8487 -1.0397 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 14 1 0 0 0 0 + 2 15 1 0 0 0 0 + 3 8 1 0 0 0 0 + 3 26 1 0 0 0 0 + 4 9 1 0 0 0 0 + 4 27 1 0 0 0 0 + 5 6 1 0 0 0 0 + + + +* output :: + + NAME MOLECULE RINGS_WITH_LINKERS_1 RINGS_WITH_LINKERS_2 MURCKO_1 MURCKO_2 OPREA_1 OPREA_2 OPREA_3 SCHUFFENHAUER_1 SCHUFFENHAUER_2 SCHUFFENHAUER_3 SCHUFFENHAUER_4 SCHUFFENHAUER_5 + 3037 Oc1ccc(cc1Cc1cc(Cl)ccc1O)Cl c1ccc(cc1)Cc1ccccc1 c1ccc(cc1)Cc1ccccc1 C1CCC(CC1)CC1CCCCC1 C1CCC(CC1)C1CCCCC1 C1CC1C1CC1 C1CC1C1CC1 C1CC1C1CC1 c1ccccc1 + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strip-it/test-data/CID_3037.sdf Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,220 @@ +3037 + -OEChem-08231108593D + + 27 28 0 0 0 0 0 0 0999 V2000 + -4.8550 1.3401 0.2120 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8529 -1.3406 0.2121 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1809 -2.1668 -0.3789 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1788 2.1664 -0.3787 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0011 -0.0002 1.4744 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2222 -0.2738 0.6597 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2377 0.2772 0.6480 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2586 -1.3462 -0.2316 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2565 1.3457 -0.2314 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3343 0.5568 0.7972 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3322 -0.5574 0.7972 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4069 -1.5879 -0.9855 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4048 1.5875 -0.9852 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4827 0.3152 0.0433 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4807 -0.3156 0.0435 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.5190 -0.7571 -0.8481 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5170 0.7568 -0.8478 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1548 0.8649 2.1342 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1601 -0.8435 2.1593 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3089 1.3938 1.4913 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3053 -1.3909 1.4943 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4415 -2.4213 -1.6818 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4469 2.4191 -1.6835 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.4070 -0.9574 -1.4422 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.4050 0.9570 -1.4418 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2961 -2.2262 0.4641 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3872 2.8487 -1.0397 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 14 1 0 0 0 0 + 2 15 1 0 0 0 0 + 3 8 1 0 0 0 0 + 3 26 1 0 0 0 0 + 4 9 1 0 0 0 0 + 4 27 1 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 1 0 0 0 0 + 5 18 1 0 0 0 0 + 5 19 1 0 0 0 0 + 6 8 2 0 0 0 0 + 6 10 1 0 0 0 0 + 7 9 2 0 0 0 0 + 7 11 1 0 0 0 0 + 8 12 1 0 0 0 0 + 9 13 1 0 0 0 0 + 10 14 2 0 0 0 0 + 10 20 1 0 0 0 0 + 11 15 2 0 0 0 0 + 11 21 1 0 0 0 0 + 12 16 2 0 0 0 0 + 12 22 1 0 0 0 0 + 13 17 2 0 0 0 0 + 13 23 1 0 0 0 0 + 14 16 1 0 0 0 0 + 15 17 1 0 0 0 0 + 16 24 1 0 0 0 0 + 17 25 1 0 0 0 0 +M END +> <PUBCHEM_COMPOUND_CID> +3037 + +> <PUBCHEM_CONFORMER_RMSD> +0.6 + +> <PUBCHEM_CONFORMER_DIVERSEORDER> +8 +10 +12 +1 +7 +5 +11 +3 +6 +9 +4 +2 + +> <PUBCHEM_MMFF94_PARTIAL_CHARGES> +25 +1 -0.18 +10 -0.15 +11 -0.15 +12 -0.15 +13 -0.15 +14 0.18 +15 0.18 +16 -0.15 +17 -0.15 +2 -0.18 +20 0.15 +21 0.15 +22 0.15 +23 0.15 +24 0.15 +25 0.15 +26 0.45 +27 0.45 +3 -0.53 +4 -0.53 +5 0.29 +6 -0.14 +7 -0.14 +8 0.08 +9 0.08 + +> <PUBCHEM_EFFECTIVE_ROTOR_COUNT> +2 + +> <PUBCHEM_PHARMACOPHORE_FEATURES> +4 +1 3 donor +1 4 donor +6 6 8 10 12 14 16 rings +6 7 9 11 13 15 17 rings + +> <PUBCHEM_HEAVY_ATOM_COUNT> +17 + +> <PUBCHEM_ATOM_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_ISOTOPIC_ATOM_COUNT> +0 + +> <PUBCHEM_COMPONENT_COUNT> +1 + +> <PUBCHEM_CACTVS_TAUTO_COUNT> +5 + +> <PUBCHEM_CONFORMER_ID> +00000BDD00000008 + +> <PUBCHEM_MMFF94_ENERGY> +44.6858 + +> <PUBCHEM_FEATURE_SELFOVERLAP> +20.297 + +> <PUBCHEM_SHAPE_FINGERPRINT> +10062212 137 18261117369936506423 +104564 63 17986963035811110412 +11458722 120 18339359768245870841 +11471102 22 5472872458301843344 +11578080 2 18190204380446433792 +116883 192 18265608969609498196 +12236239 1 18410856576819659107 +12592029 89 18338223951597366363 +13549 16 18410575084668353682 +13693222 15 6555421915516066822 +13764800 53 14189033175566991199 +14115302 16 18186237320680093898 +14341114 328 10087642619424135543 +14787075 74 9511159855286719151 +14993402 34 18410855451538227223 +15099037 51 18340768233908588503 +15207287 21 15719111361650760302 +15375358 24 15647053767618106914 +15775835 57 18272650117329930317 +16945 1 17906452130063974618 +17834072 14 15936410035134206066 +18186145 218 17132117918276567720 +19422 9 18271525295227750719 +20279233 1 15719389529571237654 +20645476 183 18339080393619327415 +23402539 116 18186809105365620101 +23402655 69 18342736308283284156 +23559900 14 17603590712323212176 +25 1 17561083592297532664 +26918003 58 6266902359448424189 +296302 2 15213020427345972082 +3082319 5 18338798905472319583 +34934 24 18341891845236497020 +633830 44 17703790310130762689 +74978 22 18266740181857992718 +7832392 63 18340206284835898173 +81228 2 15720767252053392762 +9981440 41 17403743242177431832 + +> <PUBCHEM_SHAPE_MULTIPOLES> +341.85 +8.38 +1.9 +1.1 +0.02 +0 +-1.15 +1.94 +-0.01 +0 +-0.39 +-4.15 +0.01 +0 + +> <PUBCHEM_SHAPE_SELFOVERLAP> +722.787 + +> <PUBCHEM_SHAPE_VOLUME> +193 + +> <PUBCHEM_COORDINATE_TYPE> +2 +5 +255 + +$$$$ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strip-it/test-data/Strip-it_on_CID3037.tabular Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,2 @@ +NAME MOLECULE RINGS_WITH_LINKERS_1 RINGS_WITH_LINKERS_2 MURCKO_1 MURCKO_2 OPREA_1 OPREA_2 OPREA_3 SCHUFFENHAUER_1 SCHUFFENHAUER_2 SCHUFFENHAUER_3 SCHUFFENHAUER_4 SCHUFFENHAUER_5 +3037 Oc1ccc(cc1Cc1cc(Cl)ccc1O)Cl c1ccc(cc1)Cc1ccccc1 c1ccc(cc1)Cc1ccccc1 C1CCC(CC1)CC1CCCCC1 C1CCC(CC1)C1CCCCC1 C1CC1C1CC1 C1CC1C1CC1 C1CC1C1CC1 c1ccccc1 c1ccc(cc1)Cc1ccccc1 c1ccc(cc1)Cc1ccccc1 c1ccc(cc1)Cc1ccccc1 c1ccc(cc1)Cc1ccccc1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Sun Mar 31 13:16:06 2013 -0400 @@ -0,0 +1,72 @@ +<tool_dependency> + <package name="silicos_it" version="1.0.0"> + <install version="1.0"> + <actions> + <!--compiling openbabel 2.3.2 --> + <action type="download_by_url">http://downloads.sourceforge.net/project/openbabel/openbabel/2.3.2/openbabel-2.3.2.tar.gz</action> + <action type="shell_command">cmake . -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/openbabel/build && make && make install</action> + + <!--compiling strip-it --> + <action type="shell_command">wget http://www.silicos-it.com/_php/download.php?file=strip-it-1.0.1.tar.gz</action> + <action type="shell_command">tar xfvz download.php?file=strip-it-1.0.1.tar.gz && + cd strip-it-1.0.1 && + cmake . -DOPENBABEL2_INCLUDE_DIRS=$INSTALL_DIR/openbabel/build/include/openbabel-2.0/ -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/strip-it/ -DOPENBABEL2_LIBRARIES=$INSTALL_DIR/openbabel/build/lib/libopenbabel.so && + make && + make install</action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/strip-it/bin</environment_variable> + </action> + + <!-- compiling align-it --> + <action type="shell_command">wget http://www.silicos-it.com/_php/download.php?file=align-it-1.0.3.tar.gz</action> + <action type="shell_command">tar xfvz download.php?file=align-it-1.0.3.tar.gz && + cd align-it-1.0.3 && + cmake . -DOPENBABEL2_INCLUDE_DIRS=$INSTALL_DIR/openbabel/build/include/openbabel-2.0/ -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/align-it/ -DOPENBABEL2_LIBRARIES=$INSTALL_DIR/openbabel/build/lib/libopenbabel.so && + make && + make install</action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/align-it/bin</environment_variable> + </action> + + <!-- compiling shape-it --> + <action type="shell_command">wget http://www.silicos-it.com/_php/download.php?file=shape-it-1.0.1.tar.gz</action> + <action type="shell_command">tar xfvz download.php?file=shape-it-1.0.1.tar.gz && + cd shape-it-1.0.1 && + cmake . -DOPENBABEL2_INCLUDE_DIRS=$INSTALL_DIR/openbabel/build/include/openbabel-2.0/ -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/shape-it/ -DOPENBABEL2_LIBRARIES=$INSTALL_DIR/openbabel/build/lib/libopenbabel.so && + make && + make install</action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/shape-it/bin</environment_variable> + </action> + + <!-- RDKit with boost, required for QED --> + <!-- try to set the right architecture with uname -m --> + <action type="shell_command">wget http://downloads.sourceforge.net/project/boost/boost/1.53.0/boost_1_53_0.tar.bz2</action> + <action type="shell_command">tar xfvj boost_1_53_0.tar.bz2 && + cd boost_1_53_0 && + ./bootstrap.sh --with-libraries=python,regex && + if [ $(uname -m) == 'x86_64' ]; then ./bjam install --prefix=$INSTALL_DIR/boost/; else ./bjam address-model=64 cflags=-fPIC cxxflags=-fPIC install --prefix=$INSTALL_DIR/boost/; fi;</action> + <!-- QED --> + <action type="shell_command">wget http://downloads.sourceforge.net/project/rdkit/rdkit/Q4_2012/RDKit_2012_12_1.tgz</action> + <action type="shell_command">tar xfvz RDKit_2012_12_1.tgz</action> + + <action type="shell_command">export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/RDKit_2012_12_1/lib/ && + export PYTHONPATH=$PYTHONPATH:`pwd`/ && + cd RDKit_2012_12_1 && + mkdir build && + cd build && + cmake .. -DBOOST_ROOT=$INSTALL_DIR/boost/ -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/rdkit/ -DRDK_INSTALL_INTREE=OFF</action> + <action type="shell_command">cd RDKit_2012_12_1/build && make </action> + <action type="shell_command">cd RDKit_2012_12_1/build && make install</action> + + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/rdkit/bin</environment_variable> + <environment_variable name="PYTHONPATH" action="prepend_to">$INSTALL_DIR/rdkit/lib/</environment_variable> + <environment_variable name="LD_LIBRARY_PATH" action="prepend_to">$INSTALL_DIR/boost/lib/</environment_variable> + </action> + + </actions> + </install> + <readme>Compiling OpenBabel requires g++ and CMake 2.4+. Optional but required for a few features is Eigen version 2 or newer and the cairo development libraries.</readme> + </package> +</tool_dependency>