# HG changeset patch # User bernhardlutz # Date 1396522257 14400 # Node ID 9ee84d9fd3a7efcf6e183f757081d2aaded21ab2 # Parent 1ad35668671798d92e0c2210e2b4fc0e609d31fb Uploaded diff -r 1ad356686717 -r 9ee84d9fd3a7 readfile.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readfile.py Thu Apr 03 06:50:57 2014 -0400 @@ -0,0 +1,19 @@ +#!/usr/bin/env python + +import io +import urllib2, urllib, httplib +def getListFromFile(file): + idlist=[] + for line in file: + if int(line): + idlist.append(line.strip()) + return idlist + +def getresult(url): + try: + connection = urllib2.urlopen(url) + except urllib2.HTTPError, e: + return "" + else: + return connection.read().rstrip() + diff -r 1ad356686717 -r 9ee84d9fd3a7 readfile.pyc Binary file readfile.pyc has changed diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rest_tool.py Thu Apr 03 06:50:57 2014 -0400 @@ -0,0 +1,61 @@ +#!/usr/bin/env python + +import sys, os +import argparse + +import readfile + +txt_output=["cids", "aids", "sids", "synonyms" ] +csv_output=["assaysummary"] +check_for_id_type=["cids", "aids", "sids"] + +def main(args): + url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/"+args.type+"/" + if args.type == "assay": + url+="aid/" + elif args.type == "compound": + url+="cid/" + elif args.type == "substance": + url+="sid/" + if args.id_file is None: + idstring=str(args.id) + else: + idlist=readfile.getListFromFile(args.id_file) + idstring=",".join(idlist) + url+=idstring+"/"+args.operation+"/" + if args.operation in csv_output: + url+="csv" + elif args.operation in txt_output: + url+="txt" + else: + url+="xml" + if args.operation in check_for_id_type and not args.id_type is None: + url+="?"+args.operation+"_type="+args.id_type + print(url) + data=readfile.getresult(url) + outfile=args.outfile + outfile.write(data) + outfile.close() + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--type', type=str, required=True, + help="That you want BioAssay Compund ...") + parser.add_argument('--id', type=str, + help="Specify the ID") + parser.add_argument('--operation', type=str, required=True, + help="Specify the operation") + parser.add_argument('--property-value', dest="property_value", type=str, + help="Specify the property") + parser.add_argument('--id-type', dest="id_type", type=str, + help="Specify the property") + parser.add_argument('--outfile', type=argparse.FileType('w'), required=True, + help="Specify one output file") + parser.add_argument('--id-file', dest="id_file", type=argparse.FileType('r'), + help="Specify a file with a list of ids, one per line") + if len(sys.argv) < 8: + print "Too few arguments..." + parser.print_help() + exit(1) + args = parser.parse_args() + main( args ) diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rest_tool.xml Thu Apr 03 06:50:57 2014 -0400 @@ -0,0 +1,256 @@ + + Fetch pubchem data + echo "0.1.0" + + REST_TOOL_SCRIPT_PATH + + + rest_tool_macros.xml + + + #if $choose_action.action == 'search': + rest_tool_search.py --type $choose_action.input_type_search --name $choose_action.search_name --outfile $output + + #elif $choose_action.action == 'specific_data': + rest_tool.py + #if $choose_action.field_or_file1.field_or_file1 == 'field': + --id $choose_action.field_or_file1.id1 + #else: + --id-file $choose_action.field_or_file1.file_ids_1 + #end if + --type $choose_action.choose_acs.input_type + + #if $choose_action.choose_acs.input_type == 'assay': + --operation $choose_action.choose_acs.operation_assay.operation_assay + #if $choose_action.choose_acs.operation_assay.operation_assay == 'property': + --property-value $choose_action.choose_acs.operation_assay.property_assay + #elif $choose_action.choose_acs.operation_assay.operation_assay == 'aids': + --id-type $choose_action.choose_acs.operation_assay.aids_type_assay + #elif $choose_action.choose_acs.operation_assay.operation_assay == 'cids': + --id-type $choose_action.choose_acs.operation_assay.cids_type_assay + #elif $choose_action.choose_acs.operation_assay.operation_assay == 'sids': + --id-type $choose_action.choose_acs.operation_assay.sids_type_assay + #end if + #elif $choose_action.choose_acs.input_type == 'compound': + --operation $choose_action.choose_acs.operation_compound.operation_compound + #if $choose_action.choose_acs.operation_compound.operation_compound == 'property': + --property-value $choose_action.choose_acs.operation_compound.property_compound + #elif $choose_action.choose_acs.operation_compound.operation_compound == 'aids': + --id-type $choose_action.choose_acs.operation_compound.aids_type_compound + #elif $choose_action.choose_acs.operation_compound.operation_compound == 'cids': + --id-type $choose_action.choose_acs.operation_compound.cids_type_compound + #elif $choose_action.choose_acs.operation_compound.operation_compound == 'sids': + --id-type $choose_action.choose_acs.operation_compound.sids_type_compound + #end if + #else: + --operation $choose_action.choose_acs.operation_substance.operation_substance + #if $choose_action.choose_acs.operation_substance.operation_substance == 'property': + --property-value $choose_action.choose_acs.operation_substance.property_substance + #elif $choose_action.choose_acs.operation_substance.operation_substance == 'aids': + --id-type $choose_action.choose_acs.operation_substance.aids_type_substance + #elif $choose_action.choose_acs.operation_substance.operation_substance == 'cids': + --id-type $choose_action.choose_acs.operation_substance.cids_type_substance + #elif $choose_action.choose_acs.operation_substance.operation_substance == 'sids': + --id-type $choose_action.choose_acs.operation_substance.sids_type_substance + #end if + #end if + --outfile $output + #elif $choose_action.action == 'compounds_for_assay': + rest_tool_comp_for_assay.py + #if $choose_action.field_or_file2.field_or_file2 == 'field': + --aid $choose_action.field_or_file2.id2 + #else: + --aid-file $choose_action.field_or_file2.file_ids_2 + #end if + --outfile $output + #elif $choose_action.action == 'assays_by_activity_or_target': + rest_tool_assay_by_activity_or_target.py + #if $choose_action.activity_or_target.activity_or_target == 'activity': + --activity $choose_action.activity_or_target.activity + #else: + --target $choose_action.activity_or_target.target_id + --target-type $choose_action.activity_or_target.target_identifier_type + #end if + --outfile $output + + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool fetches data from pubchem + + diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_alt.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rest_tool_alt.xml Thu Apr 03 06:50:57 2014 -0400 @@ -0,0 +1,156 @@ + + Fetch pubchem data + echo "0.1.0" + + REST_TOOL_SCRIPT_PATH + + + #if $choose_action.action == 'specific_data': + rest_tool.py + #if $choose_action.field_or_file1.field_or_file1 == 'field': + --id $choose_action.field_or_file1.id1 + #else: + --idfile $choose_action.field_or_file1.file_ids_1 + #end if + --type $choose_action.input_type + + --operation $choose_action.operation_property.operation + #if $choose_action.operation_property.operation == 'property': + --property-value $choose_action.operation_property.property + #end if + + --outfile $output + #elif $choose_action.action == 'compounds_for_assay': + rest_tool_comp_for_assay.py + #if $choose_action.field_or_file2.field_or_file2 == 'field': + --aid $choose_action.field_or_file2.id2 + #else: + --aidfile $choose_action.field_or_file2.file_ids_2 + #end if + --outfile $output + #elif $choose_action.action == 'assays_by_activity': + rest_tool_assay_by_activity_or_target.py --activity $choose_action.activity --outfile $output + #elif $choose_action.action == 'assays_by_targets': + rest_tool_assay_by_activity_or_target.py --targettype $choose_action.target_identifier_type --targetid $choose_action.target_id --outfile $output + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool fetches data from pubchem + + diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_assay_by_activity_or_target.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rest_tool_assay_by_activity_or_target.py Thu Apr 03 06:50:57 2014 -0400 @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +import sys, os +import argparse +import readfile + +def main(args): + #search for acitivity or target + url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/" + if args.activity is None: + #target + url+="target/"+args.targettype+"/"+args.targetid + else: + url+="activity/"+args.activity + url+="/aids/txt" + data=readfile.getresult(url) + args.outfile.write(data) + args.outfile.close() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--activity', type=str, + help="Activities you are looking for") + parser.add_argument('--target-type', dest="target_type", type=str, + help="The target identifier type") + parser.add_argument('--target-id', dest="target_id", type=str, + help="The specific target") + parser.add_argument('--outfile', type=argparse.FileType('w'), required=True, + help="Specify output file") + if len(sys.argv) < 2: + print "Too few arguments..." + parser.print_help() + exit(1) + args = parser.parse_args() + main( args ) diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_assays_with_cids_given_target.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rest_tool_assays_with_cids_given_target.py Thu Apr 03 06:50:57 2014 -0400 @@ -0,0 +1,93 @@ +#!/usr/bin/env python + + +import sys, os +import argparse +import readfile + +#get every aid as a list +#returns a dictionary with aid as key and as value the list of cids +def getAllAssayIDs(): + url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT" + data=readfile.getresult(url) + aidlist=readfile.getListFromString(data) + return aidlist + + +def getIDofLine(line): + arr=line.split(">") + if len(arr) > 1: + aid=arr[1].split("<")[0] + return aid + else: + return "-1" + +#get xml of all aids with cids for an activity +def getAllCidsForAssayActivity(activity): + url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/activity/"+activity+"/aids/txt?list_return=listkey" + listkey=readfile.getresult(url) +# url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/listkey/"+listkey+"/cids/xml" + url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/25425,12345/cids/xml" + print("url: "+url) + xml=readfile.getresult(url) + + #init parser + handler = DictHandler() + parser = sax.make_parser() + parser.setContentHandler(handler) + + tempfile=open("tempfile","w") + #handle the last line, there is sometimes some random output + lastline_arr=xml.split("\n") + #print(lastline_arr) + + print("l: ") + print(len(lastline_arr)) + lastline=lastline_arr[len(lastline_arr)-1] + print("lastline: "+lastline) + print("lastline-2: "+lastline_arr[len(lastline_arr)-2]) + cidlastline=getIDofLine(lastline) + aidkey="-1" + if cidlastline != "-1": + i=len(lastline_arr)-2 + #search for nex aid entry + while i >= 0 and "AID" not in lastline_arr[i]: + i-=1 + if i >= 0: + aid=getIDofLine(lastline_arr[i]) + if aid != "-1": + aidkey=aid + #remove the last line and put the array back together + + lastline_arr_list=list(lastline_arr) + #lastline_arr_list.remove(lastline) + xml2="\n".join(lastline_arr_list) + tempfile.write(xml2) + #add the last tags + #tempfile.write("") + tempfile.close() + parser.parse(open("tempfile","r")) + dic=handler.ergebnis + + #add the last line + #if cidlastline != "-1": + # dic[aidkey].append(cidlastline) + return dic + + +def main(args): + aid_cid_dict=getAllCidsForAssayActivity(args.target) + write_to_csv(aid_cid_dict, args.outfile) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--outfile', type=argparse.FileType('w'), + help="Specify output file") + parser.add_argument('--target', type=str, + help="Specify output file") + if len(sys.argv) < 2: + print "Too few arguments..." + parser.print_help() + exit(1) + args = parser.parse_args() + main( args ) diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_comp_for_assay.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rest_tool_comp_for_assay.py Thu Apr 03 06:50:57 2014 -0400 @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +import sys, os +import argparse +import tempfile +import readfile +import rest_tool_functions + + +#get the cids for bioassay aid +def get_aid_cid_dict_for_list(aidlist): + aidliststring=",".join(aidlist) + url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/"+aidliststring+"/cids/xml" + xml=readfile.getresult(url) + tmp = tempfile.TemporaryFile() + tmp.write(xml) + tmp.seek(0) + dic=rest_tool_functions.give_aid_cid_dict_from_xml(tmp) + tmp.close() + return dic + +def main(args): + if args.aid_file is None: + aidlist=args.aid.split(",") + else: + aidlist=readfile.getListFromFile(args.aid_file) + dic=get_aid_cid_dict_for_list(aidlist) + rest_tool_functions.write_to_csv(dic, args.outfile) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--aid', type=str, + help="AIDs of the BioAssay") + parser.add_argument('--aid-file', dest="aid_file", type=argparse.FileType('r'), + help="Specify a file with a list of aids, one per line") + parser.add_argument('--outfile', type=argparse.FileType('w'), + help="Specify output file") + if len(sys.argv) < 2: + print "Too few arguments..." + parser.print_help() + exit(1) + args = parser.parse_args() + main( args ) diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_functions.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rest_tool_functions.py Thu Apr 03 06:50:57 2014 -0400 @@ -0,0 +1,74 @@ +#!/usr/bin/env python + + +import sys, os +import argparse +import urllib2, urllib, httplib +import readfile +import xml.sax as sax + +class DictHandler(sax.handler.ContentHandler): + + def __init__(self): + self.ergebnis = {} + self.schluessel = "" + self.wert = "" + self.aktiv = None + + def startElement(self, name, attrs): + if name == "Information": + self.schluessel = "" + self.wert = "" + elif name == "AID" or name=="CID": + self.aktiv = name + + def endElement(self, name): + if name == "AID": + self.schluessel=self.schluessel.strip() + self.ergebnis[self.schluessel]=[] + #print("huhn") + self.aktiv=None + elif name == "CID": + self.aktiv = None + self.ergebnis[self.schluessel].append(self.wert) + self.wert="" + def characters(self, content): + if self.aktiv == "AID": + self.schluessel += content + elif self.aktiv == "CID": + self.wert += content + + +def give_aid_cid_dict_from_xml(xmlfile): + handler = DictHandler() + parser = sax.make_parser() + parser.setContentHandler(handler) + parser.parse(xmlfile) + dic=handler.ergebnis + return dic + +#get every aid as a list +#returns a dictionary with aid as key and as value the list of cids +def getAllAssayIDs(): + url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT" + data=readfile.getresult(url) + aidlist=readfile.getListFromString(data) + return aidlist + + +def getIDofLine(line): + arr=line.split(">") + if len(arr) > 1: + aid=arr[1].split("<")[0] + return aid + else: + return "-1" + +def write_to_csv(aid_cid_dict, outfile): + for key in aid_cid_dict: + for cid in aid_cid_dict[key]: + outfile.write(key) + outfile.write(",") + outfile.write(cid) + outfile.write("\n") + diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rest_tool_macros.xml Thu Apr 03 06:50:57 2014 -0400 @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_search.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rest_tool_search.py Thu Apr 03 06:50:57 2014 -0400 @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +import sys, os +import argparse +import readfile + +def main(args): + #search for acitivity or target + url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/"+args.type+"/name/"+args.name + if args.type == "assay": + url+="/aids" + elif args.type == "compound": + url+="/cids" + else: + url+="/sids" + url+="/txt" + #print("url: "+url) + data=readfile.getresult(url) + args.outfile.write(data) + args.outfile.close() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--name', type=str, required=True, + help="Enter the name") + parser.add_argument('--type', type=str, required=True, + help="What you want to search for") + parser.add_argument('--outfile', type=argparse.FileType('w'), required=True, + help="Specify output file") + if len(sys.argv) < 2: + print "Too few arguments..." + parser.print_help() + exit(1) + args = parser.parse_args() + main( args ) diff -r 1ad356686717 -r 9ee84d9fd3a7 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Apr 03 06:50:57 2014 -0400 @@ -0,0 +1,5 @@ + + + $REPOSITORY_INSTALL_DIR + +