# HG changeset patch # User bernhardlutz # Date 1399023628 14400 # Node ID 3c1e862e8cd68fe471633da361a704dcbf58e99f # Parent 35b41070c20dd517791f6c2abee2eafd62fc738f Uploaded diff -r 35b41070c20d -r 3c1e862e8cd6 readfile.py --- a/readfile.py Thu Apr 03 06:51:54 2014 -0400 +++ b/readfile.py Fri May 02 05:40:28 2014 -0400 @@ -17,3 +17,7 @@ else: return connection.read().rstrip() +def store_result(url, outfile): + data=getresult(url) + outfile.write(data) + outfile.close() diff -r 35b41070c20d -r 3c1e862e8cd6 readfile.pyc Binary file readfile.pyc has changed diff -r 35b41070c20d -r 3c1e862e8cd6 rest_tool.py --- a/rest_tool.py Thu Apr 03 06:51:54 2014 -0400 +++ b/rest_tool.py Fri May 02 05:40:28 2014 -0400 @@ -6,7 +6,7 @@ import readfile txt_output=["cids", "aids", "sids", "synonyms" ] -csv_output=["assaysummary"] +csv_output=["assaysummary", "property"] check_for_id_type=["cids", "aids", "sids"] def main(args): @@ -23,6 +23,8 @@ idlist=readfile.getListFromFile(args.id_file) idstring=",".join(idlist) url+=idstring+"/"+args.operation+"/" + if args.operation == "property": + url+=args.property_value+"/" if args.operation in csv_output: url+="csv" elif args.operation in txt_output: diff -r 35b41070c20d -r 3c1e862e8cd6 rest_tool.xml --- a/rest_tool.xml Thu Apr 03 06:51:54 2014 -0400 +++ b/rest_tool.xml Fri May 02 05:40:28 2014 -0400 @@ -6,17 +6,15 @@ rest_tool_macros.xml + rest_tool_macro_advanced.xml - #if $choose_action.action == 'search': - rest_tool_search.py --type $choose_action.input_type_search --name $choose_action.search_name --outfile $output - - #elif $choose_action.action == 'specific_data': + #if $choose_action.action == 'specific_data': rest_tool.py - #if $choose_action.field_or_file1.field_or_file1 == 'field': - --id $choose_action.field_or_file1.id1 + #if $choose_action.field_or_file.field_or_file_select == 'field': + --id $choose_action.field_or_file.id_value #else: - --id-file $choose_action.field_or_file1.file_ids_1 + --id-file $choose_action.field_or_file.id_value #end if --type $choose_action.choose_acs.input_type @@ -24,43 +22,31 @@ --operation $choose_action.choose_acs.operation_assay.operation_assay #if $choose_action.choose_acs.operation_assay.operation_assay == 'property': --property-value $choose_action.choose_acs.operation_assay.property_assay - #elif $choose_action.choose_acs.operation_assay.operation_assay == 'aids': - --id-type $choose_action.choose_acs.operation_assay.aids_type_assay - #elif $choose_action.choose_acs.operation_assay.operation_assay == 'cids': - --id-type $choose_action.choose_acs.operation_assay.cids_type_assay - #elif $choose_action.choose_acs.operation_assay.operation_assay == 'sids': - --id-type $choose_action.choose_acs.operation_assay.sids_type_assay + #elif $choose_action.choose_acs.operation_assay.operation_assay == 'aids' or $choose_action.choose_acs.operation_assay.operation_assay == 'cids' or $choose_action.choose_acs.operation_assay.operation_assay == 'sids': + --id-type $choose_action.choose_acs.operation_assay.ids_type_assay #end if #elif $choose_action.choose_acs.input_type == 'compound': --operation $choose_action.choose_acs.operation_compound.operation_compound #if $choose_action.choose_acs.operation_compound.operation_compound == 'property': --property-value $choose_action.choose_acs.operation_compound.property_compound - #elif $choose_action.choose_acs.operation_compound.operation_compound == 'aids': - --id-type $choose_action.choose_acs.operation_compound.aids_type_compound - #elif $choose_action.choose_acs.operation_compound.operation_compound == 'cids': - --id-type $choose_action.choose_acs.operation_compound.cids_type_compound - #elif $choose_action.choose_acs.operation_compound.operation_compound == 'sids': - --id-type $choose_action.choose_acs.operation_compound.sids_type_compound + #elif $choose_action.choose_acs.operation_compound.operation_compound == 'aids' or $choose_action.choose_acs.operation_compound.operation_compound == 'cids' or $choose_action.choose_acs.operation_compound.operation_compound == 'sids': + --id-type $choose_action.choose_acs.operation_compound.ids_type_compound #end if #else: --operation $choose_action.choose_acs.operation_substance.operation_substance #if $choose_action.choose_acs.operation_substance.operation_substance == 'property': --property-value $choose_action.choose_acs.operation_substance.property_substance - #elif $choose_action.choose_acs.operation_substance.operation_substance == 'aids': - --id-type $choose_action.choose_acs.operation_substance.aids_type_substance - #elif $choose_action.choose_acs.operation_substance.operation_substance == 'cids': - --id-type $choose_action.choose_acs.operation_substance.cids_type_substance - #elif $choose_action.choose_acs.operation_substance.operation_substance == 'sids': - --id-type $choose_action.choose_acs.operation_substance.sids_type_substance + #elif $choose_action.choose_acs.operation_substance.operation_substance == 'aids' or $choose_action.choose_acs.operation_substance.operation_substance == 'cids' or $choose_action.choose_acs.operation_substance.operation_substance == 'sids': + --id-type $choose_action.choose_acs.operation_substance.ids_type_substance #end if #end if --outfile $output #elif $choose_action.action == 'compounds_for_assay': rest_tool_comp_for_assay.py - #if $choose_action.field_or_file2.field_or_file2 == 'field': - --aid $choose_action.field_or_file2.id2 + #if $choose_action.field_or_file.field_or_file_select == 'field': + --aid $choose_action.field_or_file.id_value #else: - --aid-file $choose_action.field_or_file2.file_ids_2 + --aid-file $choose_action.field_or_file.id_value #end if --outfile $output #elif $choose_action.action == 'assays_by_activity_or_target': @@ -72,23 +58,81 @@ --target-type $choose_action.activity_or_target.target_identifier_type #end if --outfile $output - + #elif $choose_action.action == 'advanced': + rest_tool_advanced.py + --outfile $output + --type $choose_action.input_type.input_type_select + --id-type $choose_action.input_type.namespace_id.namespace_id_select + #if $choose_action.input_type.input_type_select == 'assay': + #if $choose_action.input_type.namespace_id.namespace_id_select == 'aid': + --id-type-ff $choose_action.input_type.namespace_id.field_or_file.field_or_file_select + --id-value $choose_action.input_type.namespace_id.field_or_file.id_value + #elif $choose_action.input_type.namespace_id.namespace_id_select == 'target': + --id-value $choose_action.input_type.namespace_id.target_select + #elif $choose_action.input_type.namespace_id.namespace_id_select == 'type': + --id-value $choose_action.input_type.namespace_id.type_select + #elif $choose_action.input_type.namespace_id.namespace_id_select == 'activity': + --id-value $choose_action.input_type.namespace_id.activity_select + #end if + + --operation $choose_action.input_type.operation_assay.operation_assay_select + + #if $choose_action.input_type.operation_assay.operation_assay_select == 'property': + --operation-value $choose_action.input_type.operation_assay.operation_assay_select.property_assay + #elif $choose_action.input_type.operation_assay.operation_assay_select == 'target': + --operation-value $choose_action.input_type.operation_assay.operation_assay_select.target_assay + #elif $choose_action.input_type.operation_assay.operation_assay_select == 'aids' or $choose_action.input_type.operation_assay.operation_assay_select == 'cids' or $choose_action.input_type.operation_assay.operation_assay_select == 'sids': + --ids-operation-type $choose_action.input_type.operation_assay.ids_type_assay + #end if + #elif $choose_action.input_type.input_type_select == 'compound': + #if $choose_action.input_type.namespace_id.namespace_id_select == 'cid' or $choose_action.input_type.namespace_id.namespace_id_select == 'name' or $choose_action.input_type.namespace_id.namespace_id_select == 'smiles' or $choose_action.input_type.namespace_id.namespace_id_select == 'inchi' or $choose_action.input_type.namespace_id.namespace_id_select == 'inchikey' or $choose_action.input_type.namespace_id.namespace_id_select == 'sdf': + --id-type-ff $choose_action.input_type.namespace_id.field_or_file.field_or_file_select + --id-value $choose_action.input_type.namespace_id.field_or_file.id_value + #elif $choose_action.input_type.namespace_id.namespace_id_select == 'xref': + --xref $choose_action.input_type.namespace_id.xref_select + --xref-value $choose_action.input_type.namespace_id.xref_value + #end if + + --operation $choose_action.input_type.operation_compound.operation_compound_select + #if $choose_action.input_type.operation_compound.operation_compound_select == 'property': + --operation-value $choose_action.input_type.operation_compound.property_compound + #elif $choose_action.input_type.operation_compound.operation_compound_select == 'xrefs': + --operation-value $choose_action.input_type.operation_compound.xref_compound + #elif $choose_action.input_type.operation_compound.operation_compound_select == 'aids' or $choose_action.input_type.operation_compound.operation_compound_select == 'cids' or $choose_action.input_type.operation_compound.operation_compound_select == 'sids': + --ids-operation-type $choose_action.input_type.operation_compound.ids_type_compound + #end if + + #elif $choose_action.input_type.input_type_select == 'substance': + #if $choose_action.input_type.namespace_id.namespace_id_select == 'sid' or $choose_action.input_type.namespace_id.namespace_id_select == 'name': + --id-type-ff $choose_action.input_type.namespace_id.field_or_file.field_or_file_select + --id-value $choose_action.input_type.namespace_id.field_or_file.id_value + #elif $choose_action.input_type.namespace_id.namespace_id_select == 'xrefs': + --xref $choose_action.input_type.namespace_id.xref_select + --xref-value $choose_action.input_type.namespace_id.xref_value + #end if + --operation $choose_action.input_type.operation_substance.operation_substance_select + #if $choose_action.input_type.operation_substance.operation_substance_select == 'property': + --operation-value $choose_action.input_type.operation_substance.operation_substance_select.property_substance + #elif $choose_action.input_type.operation_substance.operation_substance_select == 'xref': + --operation-value $choose_action.input_type.operation_substance.xref_substance + #elif $choose_action.input_type.operation_substance.operation_substance_select == 'aids' or $choose_action.input_type.operation_substance.operation_substance_select == 'cids' or $choose_action.input_type.operation_substance.operation_substance_select == 'sids': + --ids-operation-type $choose_action.input_type.operation_substance.ids_type_substance + #end if + #end if #end if - + - - - + @@ -96,128 +140,26 @@ - - - - - - - - - - - - + - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - + @@ -240,7 +182,9 @@ - + + + @@ -251,6 +195,6 @@ **What it does** -This tool fetches data from pubchem +This tool fetches data from pubchem. The outputformat will be if possible csv > txt > xml diff -r 35b41070c20d -r 3c1e862e8cd6 rest_tool_advanced.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rest_tool_advanced.py Fri May 02 05:40:28 2014 -0400 @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +import sys, os +import argparse + +import readfile + +#dicitionary for the output format + +dict_output={"cids" :"txt", "aids" : "txt", "sids" : "txt", "description": "xml", "summary" : "xml", "record" : "csv", "classification": "xml", "targets" : "txt", "xrefs" : "txt", "synonyms" : "txt", "property": "csv" } + +#alles andere ist xml +check_for_id_type=["cids", "aids", "sids"] + +id_dict={"compound": "cid", "assay": "aid", "substance" : "sid" } + +def getListString(args): + if args.id_type_ff == "file": + #build comma list + list_string=",".join(getListFromFile(open(args.id_value,"r"))) + else: + list_string=args.id_value + return list_string + + +def main(args): + + url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/"+args.type+"/" + url+=args.id_type+"/" + if args.id_type ==id_dict[args.type]: + url+=getListString(args)+"/" + else: + url+=args.id_value+"/" + url+=args.operation+"/" + if args.operation == "target" or args.operation == "property" or args.operation == "xrefs": + url+=args.operation_value+"/" + + url+=dict_output[args.operation] + if args.operation in check_for_id_type: + url+="?"+args.operation+"_type="+args.ids_operation_type + print(url) + readfile.store_result(url, args.outfile) + + + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--type', type=str, required=True, + help="That you want BioAssay Compund ...") + parser.add_argument('--id-type', type=str, + help="Specify the ID type") + parser.add_argument('--operation', type=str, required=True, + help="Specify the operation") + parser.add_argument('--operation-value', dest="operation_value", type=str, required=False, + help="Specify the additional operation value") + parser.add_argument('--xref-operation-value', dest="xref_operation_value", type=str, required=False, + help="Specify the xref operation ") + parser.add_argument('--ids-operation-type', dest="ids_operation_type", type=str, required=False, + help="all inactive ...") + parser.add_argument('--xref', dest="xref", type=str, + help="use xref to identify the searched thing") + parser.add_argument('--xref-value', dest="xref_value", type=str, + help="Specify the xref") + parser.add_argument('--property-value', dest="property_value", type=str, + help="Specify the property value") + parser.add_argument('--id-type-ff', dest="id_type_ff", type=str, + help="file or field") + parser.add_argument('--id-value', dest="id_value", type=str, required=True, + help="Specify the id") + parser.add_argument('--outfile', type=argparse.FileType('w'), required=True, + help="Specify the output file") + + + if len(sys.argv) < 8: + print "Too few arguments..." + parser.print_help() + exit(1) + args = parser.parse_args() + main( args ) diff -r 35b41070c20d -r 3c1e862e8cd6 rest_tool_assay_by_activity_or_target.py --- a/rest_tool_assay_by_activity_or_target.py Thu Apr 03 06:51:54 2014 -0400 +++ b/rest_tool_assay_by_activity_or_target.py Fri May 02 05:40:28 2014 -0400 @@ -9,7 +9,7 @@ url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/" if args.activity is None: #target - url+="target/"+args.targettype+"/"+args.targetid + url+= "target/%s/%s" % ( args.targettype, args.targetid ) else: url+="activity/"+args.activity url+="/aids/txt" diff -r 35b41070c20d -r 3c1e862e8cd6 rest_tool_assays_with_cids_given_target.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rest_tool_assays_with_cids_given_target.py Fri May 02 05:40:28 2014 -0400 @@ -0,0 +1,93 @@ +#!/usr/bin/env python + + +import sys, os +import argparse +import readfile + +#get every aid as a list +#returns a dictionary with aid as key and as value the list of cids +def getAllAssayIDs(): + url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT" + data=readfile.getresult(url) + aidlist=readfile.getListFromString(data) + return aidlist + + +def getIDofLine(line): + arr=line.split(">") + if len(arr) > 1: + aid=arr[1].split("<")[0] + return aid + else: + return "-1" + +#get xml of all aids with cids for an activity +def getAllCidsForAssayActivity(activity): + url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/activity/"+activity+"/aids/txt?list_return=listkey" + listkey=readfile.getresult(url) +# url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/listkey/"+listkey+"/cids/xml" + url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/25425,12345/cids/xml" + print("url: "+url) + xml=readfile.getresult(url) + + #init parser + handler = DictHandler() + parser = sax.make_parser() + parser.setContentHandler(handler) + + tempfile=open("tempfile","w") + #handle the last line, there is sometimes some random output + lastline_arr=xml.split("\n") + #print(lastline_arr) + + print("l: ") + print(len(lastline_arr)) + lastline=lastline_arr[len(lastline_arr)-1] + print("lastline: "+lastline) + print("lastline-2: "+lastline_arr[len(lastline_arr)-2]) + cidlastline=getIDofLine(lastline) + aidkey="-1" + if cidlastline != "-1": + i=len(lastline_arr)-2 + #search for nex aid entry + while i >= 0 and "AID" not in lastline_arr[i]: + i-=1 + if i >= 0: + aid=getIDofLine(lastline_arr[i]) + if aid != "-1": + aidkey=aid + #remove the last line and put the array back together + + lastline_arr_list=list(lastline_arr) + #lastline_arr_list.remove(lastline) + xml2="\n".join(lastline_arr_list) + tempfile.write(xml2) + #add the last tags + #tempfile.write("") + tempfile.close() + parser.parse(open("tempfile","r")) + dic=handler.ergebnis + + #add the last line + #if cidlastline != "-1": + # dic[aidkey].append(cidlastline) + return dic + + +def main(args): + aid_cid_dict=getAllCidsForAssayActivity(args.target) + write_to_csv(aid_cid_dict, args.outfile) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--outfile', type=argparse.FileType('w'), + help="Specify output file") + parser.add_argument('--target', type=str, + help="Specify output file") + if len(sys.argv) < 2: + print "Too few arguments..." + parser.print_help() + exit(1) + args = parser.parse_args() + main( args ) diff -r 35b41070c20d -r 3c1e862e8cd6 rest_tool_macro_advanced.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rest_tool_macro_advanced.xml Fri May 02 05:40:28 2014 -0400 @@ -0,0 +1,109 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 35b41070c20d -r 3c1e862e8cd6 rest_tool_macros.xml --- a/rest_tool_macros.xml Thu Apr 03 06:51:54 2014 -0400 +++ b/rest_tool_macros.xml Fri May 02 05:40:28 2014 -0400 @@ -4,7 +4,11 @@ - + + + + + @@ -57,10 +61,191 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +