# HG changeset patch # User bgruening # Date 1368306391 14400 # Node ID b4ad35a9c82ade39c081b516702440acfcad0113 # Parent 77b168565931a28a883108d95c12b6b88fbd2291 Deleted selected files diff -r 77b168565931 -r b4ad35a9c82a rdkit_descriptors.py --- a/rdkit_descriptors.py Sat May 11 17:05:26 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,77 +0,0 @@ -#!/usr/bin/env python - -from rdkit.Chem import Descriptors -from rdkit import Chem -import sys, os, re -import argparse -import inspect - -def get_supplier( infile, format = 'smiles' ): - """ - Returns a generator over a SMILES or InChI file. Every element is of RDKit - molecule and has its original string as _Name property. - """ - with open(infile) as handle: - for line in handle: - line = line.strip() - if format == 'smiles': - mol = Chem.MolFromSmiles( line, sanitize=True ) - elif format == 'inchi': - mol = Chem.inchi.MolFromInchi( line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False ) - if mol is None: - yield False - else: - mol.SetProp( '_Name', line.split('\t')[0] ) - yield mol - - -def get_rdkit_descriptor_functions(): - """ - Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function) - """ - ret = [ (name, f) for name, f in inspect.getmembers( Descriptors ) if inspect.isfunction( f ) and not name.startswith( '_' ) ] - ret.sort() - return ret - - -def descriptors( mol, functions ): - """ - Calculates the descriptors of a given molecule. - """ - for name, function in functions: - yield (name, function( mol )) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') - parser.add_argument("--iformat", help="Specify the input file format.") - - parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), - default=sys.stdout, help="path to the result file, default it sdtout") - - parser.add_argument("--header", dest="header", action="store_true", - default=False, - help="Write header line.") - - args = parser.parse_args() - - if args.iformat == 'sdf': - supplier = Chem.SDMolSupplier( args.infile ) - elif args.iformat =='smi': - supplier = get_supplier( args.infile, format = 'smiles' ) - elif args.iformat == 'inchi': - supplier = get_supplier( args.infile, format = 'inchi' ) - - functions = get_rdkit_descriptor_functions() - - if args.header: - args.outfile.write( '%s\n' % '\t'.join( [name for name, f in functions] ) ) - - for mol in supplier: - if not mol: - continue - descs = descriptors( mol, functions ) - molecule_id = mol.GetProp("_Name") - args.outfile.write( "%s\n" % '\t'.join( [molecule_id]+ [str(res) for name, res in descs] ) ) - diff -r 77b168565931 -r b4ad35a9c82a rdkit_descriptors.xml --- a/rdkit_descriptors.xml Sat May 11 17:05:26 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ - - calculated with RDKit - - - rdkit - - rdkit_descriptors.py -i "${infile}" --iformat "${infile.ext}" -o "${outfile}" $header 2>&1 - - - - - - - - - - - -.. class:: infomark - -**TIP:** If your data is not in SD- or SMILES format, use the pencil icon on your dataset to convert your data - ------ - -**Example** - -- Input file:: - - - SD-Format (http://en.wikipedia.org/wiki/Chemical_table_file) - - SMILES Format (http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification) - -- Result:: - - -http://code.google.com/p/rdkit/wiki/DescriptorsInTheRDKit -http://rdkit.org/Python_Docs/rdkit.Chem.Descriptors-module.html - - ------ - - -**Cite** - -Greg Landrum -[1] RDKit: Open-source cheminformatics; http://www.rdkit.org - - - - diff -r 77b168565931 -r b4ad35a9c82a repository_dependencies.xml --- a/repository_dependencies.xml Sat May 11 17:05:26 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ - - - - diff -r 77b168565931 -r b4ad35a9c82a tool_dependencies.xml --- a/tool_dependencies.xml Sat May 11 17:05:26 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ - - - - - - - - - - - - - http://rdkit.googlecode.com/files/RDKit_2013_03_2.tgz - - - - - - - - - - - - - - - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/lib/ && - export PYTHONPATH=$PYTHONPATH:`pwd`/ && - mkdir build && - cd build && - cmake .. -DBOOST_ROOT=$BOOST_ROOT_DIR -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/rdkit/ -DRDK_INSTALL_INTREE=OFF -DPYTHON_NUMPY_INCLUDE_PATH=$PYTHONPATH_NUMPY/numpy/core/include/ - cd ./build && make - cd ./build && make install - - - $INSTALL_DIR/rdkit - $INSTALL_DIR/rdkit - $INSTALL_DIR/rdkit/bin - $INSTALL_DIR/rdkit/lib - - - - Compiling rdkit requires cmake, python headers, sqlite3, flex and bison. - -