comparison rest_tool_assays_with_cids_given_target.py @ 6:9ee84d9fd3a7 draft

Uploaded
author bernhardlutz
date Thu, 03 Apr 2014 06:50:57 -0400
parents
children
comparison
equal deleted inserted replaced
5:1ad356686717 6:9ee84d9fd3a7
1 #!/usr/bin/env python
2
3
4 import sys, os
5 import argparse
6 import readfile
7
8 #get every aid as a list
9 #returns a dictionary with aid as key and as value the list of cids
10 def getAllAssayIDs():
11 url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT"
12 data=readfile.getresult(url)
13 aidlist=readfile.getListFromString(data)
14 return aidlist
15
16
17 def getIDofLine(line):
18 arr=line.split(">")
19 if len(arr) > 1:
20 aid=arr[1].split("<")[0]
21 return aid
22 else:
23 return "-1"
24
25 #get xml of all aids with cids for an activity
26 def getAllCidsForAssayActivity(activity):
27 url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/activity/"+activity+"/aids/txt?list_return=listkey"
28 listkey=readfile.getresult(url)
29 # url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/listkey/"+listkey+"/cids/xml"
30 url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/25425,12345/cids/xml"
31 print("url: "+url)
32 xml=readfile.getresult(url)
33
34 #init parser
35 handler = DictHandler()
36 parser = sax.make_parser()
37 parser.setContentHandler(handler)
38
39 tempfile=open("tempfile","w")
40 #handle the last line, there is sometimes some random output
41 lastline_arr=xml.split("\n")
42 #print(lastline_arr)
43
44 print("l: ")
45 print(len(lastline_arr))
46 lastline=lastline_arr[len(lastline_arr)-1]
47 print("lastline: "+lastline)
48 print("lastline-2: "+lastline_arr[len(lastline_arr)-2])
49 cidlastline=getIDofLine(lastline)
50 aidkey="-1"
51 if cidlastline != "-1":
52 i=len(lastline_arr)-2
53 #search for nex aid entry
54 while i >= 0 and "AID" not in lastline_arr[i]:
55 i-=1
56 if i >= 0:
57 aid=getIDofLine(lastline_arr[i])
58 if aid != "-1":
59 aidkey=aid
60 #remove the last line and put the array back together
61
62 lastline_arr_list=list(lastline_arr)
63 #lastline_arr_list.remove(lastline)
64 xml2="\n".join(lastline_arr_list)
65 tempfile.write(xml2)
66 #add the last tags
67 #tempfile.write("</Information></InformationList>")
68 tempfile.close()
69 parser.parse(open("tempfile","r"))
70 dic=handler.ergebnis
71
72 #add the last line
73 #if cidlastline != "-1":
74 # dic[aidkey].append(cidlastline)
75 return dic
76
77
78 def main(args):
79 aid_cid_dict=getAllCidsForAssayActivity(args.target)
80 write_to_csv(aid_cid_dict, args.outfile)
81
82 if __name__ == "__main__":
83 parser = argparse.ArgumentParser()
84 parser.add_argument('--outfile', type=argparse.FileType('w'),
85 help="Specify output file")
86 parser.add_argument('--target', type=str,
87 help="Specify output file")
88 if len(sys.argv) < 2:
89 print "Too few arguments..."
90 parser.print_help()
91 exit(1)
92 args = parser.parse_args()
93 main( args )