Mercurial > repos > bgruening > package_rdkit_2013_03
changeset 4:b4ad35a9c82a
Deleted selected files
author | bgruening |
---|---|
date | Sat, 11 May 2013 17:06:31 -0400 (2013-05-11) |
parents | 77b168565931 |
children | 72792a51a161 |
files | rdkit_descriptors.py rdkit_descriptors.xml repository_dependencies.xml tool_dependencies.xml |
diffstat | 4 files changed, 0 insertions(+), 176 deletions(-) [+] |
line wrap: on
line diff
--- a/rdkit_descriptors.py Sat May 11 17:05:26 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,77 +0,0 @@ -#!/usr/bin/env python - -from rdkit.Chem import Descriptors -from rdkit import Chem -import sys, os, re -import argparse -import inspect - -def get_supplier( infile, format = 'smiles' ): - """ - Returns a generator over a SMILES or InChI file. Every element is of RDKit - molecule and has its original string as _Name property. - """ - with open(infile) as handle: - for line in handle: - line = line.strip() - if format == 'smiles': - mol = Chem.MolFromSmiles( line, sanitize=True ) - elif format == 'inchi': - mol = Chem.inchi.MolFromInchi( line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False ) - if mol is None: - yield False - else: - mol.SetProp( '_Name', line.split('\t')[0] ) - yield mol - - -def get_rdkit_descriptor_functions(): - """ - Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function) - """ - ret = [ (name, f) for name, f in inspect.getmembers( Descriptors ) if inspect.isfunction( f ) and not name.startswith( '_' ) ] - ret.sort() - return ret - - -def descriptors( mol, functions ): - """ - Calculates the descriptors of a given molecule. - """ - for name, function in functions: - yield (name, function( mol )) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') - parser.add_argument("--iformat", help="Specify the input file format.") - - parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), - default=sys.stdout, help="path to the result file, default it sdtout") - - parser.add_argument("--header", dest="header", action="store_true", - default=False, - help="Write header line.") - - args = parser.parse_args() - - if args.iformat == 'sdf': - supplier = Chem.SDMolSupplier( args.infile ) - elif args.iformat =='smi': - supplier = get_supplier( args.infile, format = 'smiles' ) - elif args.iformat == 'inchi': - supplier = get_supplier( args.infile, format = 'inchi' ) - - functions = get_rdkit_descriptor_functions() - - if args.header: - args.outfile.write( '%s\n' % '\t'.join( [name for name, f in functions] ) ) - - for mol in supplier: - if not mol: - continue - descs = descriptors( mol, functions ) - molecule_id = mol.GetProp("_Name") - args.outfile.write( "%s\n" % '\t'.join( [molecule_id]+ [str(res) for name, res in descs] ) ) -
--- a/rdkit_descriptors.xml Sat May 11 17:05:26 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ -<tool id="ctb_rdkit_describtors" name="Descriptors" version="0.1"> - <description>calculated with RDKit</description> - <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism> - <requirements> - <requirement type="package" version="2012_12_1">rdkit</requirement> - </requirements> - <command interpreter="python">rdkit_descriptors.py -i "${infile}" --iformat "${infile.ext}" -o "${outfile}" $header 2>&1</command> - <inputs> - <param format="smi,sdf" name="infile" type="data" label="Molecule data in SD- or SMILES-format" help="Dataset missing? See TIP below"/> - <param name="header" type="boolean" label="Include the descriptor name as header" truevalue="--header" falsevalue="" checked="false" /> - </inputs> - <outputs> - <data format="tabular" name="outfile" /> - </outputs> - <tests> - </tests> - <help> - -.. class:: infomark - -**TIP:** If your data is not in SD- or SMILES format, use the pencil icon on your dataset to convert your data - ------ - -**Example** - -- Input file:: - - - SD-Format (http://en.wikipedia.org/wiki/Chemical_table_file) - - SMILES Format (http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification) - -- Result:: - - -http://code.google.com/p/rdkit/wiki/DescriptorsInTheRDKit -http://rdkit.org/Python_Docs/rdkit.Chem.Descriptors-module.html - - ------ - - -**Cite** - -Greg Landrum -[1] RDKit: Open-source cheminformatics; http://www.rdkit.org - - - </help> -</tool>
--- a/repository_dependencies.xml Sat May 11 17:05:26 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -<?xml version="1.0"?> -<repositories description="This requires the Molecule datatype definitions (e.g. SMILES, InChI, SD-format)."> - <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="molecule_datatypes" owner="bgruening" changeset_revision="bc6a7eeb7c32" /> -</repositories>
--- a/tool_dependencies.xml Sat May 11 17:05:26 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ -<tool_dependency> - <package name="numpy" version="1.7.1"> - <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_numpy_1_7" owner="bgruening" changeset_revision="ec80bba4bccb" prior_installation_required="True" /> - </package> - <package name="boost" version="1.53.0"> - <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_boost_1_53" owner="bgruening" changeset_revision="f4209b50c801" prior_installation_required="True" /> - </package> - - <package name="rdkit" version="2013_03_2"> - <install version="1.0"> - <actions> - <!-- first action is always downloading --> - <action type="download_by_url">http://rdkit.googlecode.com/files/RDKit_2013_03_2.tgz</action> - - <!-- populate the environment variables from the dependend repos --> - <action type="set_environment_for_install"> - <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_numpy_1_7" owner="bgruening" changeset_revision="ec80bba4bccb"> - <package name="numpy" version="1.7.1" /> - </repository> - </action> - <action type="set_environment_for_install"> - <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_boost_1_53" owner="bgruening" changeset_revision="f4209b50c801"> - <package name="boost" version="1.53.0" /> - </repository> - </action> - - <!-- PYTHONPATH_NUMPY is set in the numpy package --> - <action type="shell_command">export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/lib/ && - export PYTHONPATH=$PYTHONPATH:`pwd`/ && - mkdir build && - cd build && - cmake .. -DBOOST_ROOT=$BOOST_ROOT_DIR -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/rdkit/ -DRDK_INSTALL_INTREE=OFF -DPYTHON_NUMPY_INCLUDE_PATH=$PYTHONPATH_NUMPY/numpy/core/include/</action> - <action type="shell_command">cd ./build && make </action> - <action type="shell_command">cd ./build && make install</action> - - <action type="set_environment"> - <environment_variable name="RDBASE" action="set_to">$INSTALL_DIR/rdkit</environment_variable> - <environment_variable name="LD_LIBRARY_PATH" action="append_to">$INSTALL_DIR/rdkit</environment_variable> - <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/rdkit/bin</environment_variable> - <environment_variable name="PYTHONPATH" action="prepend_to">$INSTALL_DIR/rdkit/lib</environment_variable> - </action> - </actions> - </install> - <readme>Compiling rdkit requires cmake, python headers, sqlite3, flex and bison.</readme> - </package> -</tool_dependency>