6
|
1 #!/usr/bin/env python
|
|
2
|
|
3
|
|
4 import sys, os
|
|
5 import argparse
|
|
6 import readfile
|
|
7
|
|
8 #get every aid as a list
|
|
9 #returns a dictionary with aid as key and as value the list of cids
|
|
10 def getAllAssayIDs():
|
|
11 url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT"
|
|
12 data=readfile.getresult(url)
|
|
13 aidlist=readfile.getListFromString(data)
|
|
14 return aidlist
|
|
15
|
|
16
|
|
17 def getIDofLine(line):
|
|
18 arr=line.split(">")
|
|
19 if len(arr) > 1:
|
|
20 aid=arr[1].split("<")[0]
|
|
21 return aid
|
|
22 else:
|
|
23 return "-1"
|
|
24
|
|
25 #get xml of all aids with cids for an activity
|
|
26 def getAllCidsForAssayActivity(activity):
|
|
27 url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/activity/"+activity+"/aids/txt?list_return=listkey"
|
|
28 listkey=readfile.getresult(url)
|
|
29 # url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/listkey/"+listkey+"/cids/xml"
|
|
30 url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/25425,12345/cids/xml"
|
|
31 print("url: "+url)
|
|
32 xml=readfile.getresult(url)
|
|
33
|
|
34 #init parser
|
|
35 handler = DictHandler()
|
|
36 parser = sax.make_parser()
|
|
37 parser.setContentHandler(handler)
|
|
38
|
|
39 tempfile=open("tempfile","w")
|
|
40 #handle the last line, there is sometimes some random output
|
|
41 lastline_arr=xml.split("\n")
|
|
42 #print(lastline_arr)
|
|
43
|
|
44 print("l: ")
|
|
45 print(len(lastline_arr))
|
|
46 lastline=lastline_arr[len(lastline_arr)-1]
|
|
47 print("lastline: "+lastline)
|
|
48 print("lastline-2: "+lastline_arr[len(lastline_arr)-2])
|
|
49 cidlastline=getIDofLine(lastline)
|
|
50 aidkey="-1"
|
|
51 if cidlastline != "-1":
|
|
52 i=len(lastline_arr)-2
|
|
53 #search for nex aid entry
|
|
54 while i >= 0 and "AID" not in lastline_arr[i]:
|
|
55 i-=1
|
|
56 if i >= 0:
|
|
57 aid=getIDofLine(lastline_arr[i])
|
|
58 if aid != "-1":
|
|
59 aidkey=aid
|
|
60 #remove the last line and put the array back together
|
|
61
|
|
62 lastline_arr_list=list(lastline_arr)
|
|
63 #lastline_arr_list.remove(lastline)
|
|
64 xml2="\n".join(lastline_arr_list)
|
|
65 tempfile.write(xml2)
|
|
66 #add the last tags
|
|
67 #tempfile.write("</Information></InformationList>")
|
|
68 tempfile.close()
|
|
69 parser.parse(open("tempfile","r"))
|
|
70 dic=handler.ergebnis
|
|
71
|
|
72 #add the last line
|
|
73 #if cidlastline != "-1":
|
|
74 # dic[aidkey].append(cidlastline)
|
|
75 return dic
|
|
76
|
|
77
|
|
78 def main(args):
|
|
79 aid_cid_dict=getAllCidsForAssayActivity(args.target)
|
|
80 write_to_csv(aid_cid_dict, args.outfile)
|
|
81
|
|
82 if __name__ == "__main__":
|
|
83 parser = argparse.ArgumentParser()
|
|
84 parser.add_argument('--outfile', type=argparse.FileType('w'),
|
|
85 help="Specify output file")
|
|
86 parser.add_argument('--target', type=str,
|
|
87 help="Specify output file")
|
|
88 if len(sys.argv) < 2:
|
|
89 print "Too few arguments..."
|
|
90 parser.print_help()
|
|
91 exit(1)
|
|
92 args = parser.parse_args()
|
|
93 main( args )
|