annotate rest_tool_assays_with_cids_given_target.py @ 10:a76d64d2ed44 draft default tip

Uploaded
author bernhardlutz
date Sun, 04 May 2014 14:21:30 -0400
parents 9ee84d9fd3a7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
1 #!/usr/bin/env python
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
2
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
3
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
4 import sys, os
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
5 import argparse
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
6 import readfile
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
7
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
8 #get every aid as a list
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
9 #returns a dictionary with aid as key and as value the list of cids
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
10 def getAllAssayIDs():
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
11 url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT"
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
12 data=readfile.getresult(url)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
13 aidlist=readfile.getListFromString(data)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
14 return aidlist
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
15
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
16
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
17 def getIDofLine(line):
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
18 arr=line.split(">")
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
19 if len(arr) > 1:
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
20 aid=arr[1].split("<")[0]
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
21 return aid
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
22 else:
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
23 return "-1"
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
24
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
25 #get xml of all aids with cids for an activity
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
26 def getAllCidsForAssayActivity(activity):
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
27 url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/activity/"+activity+"/aids/txt?list_return=listkey"
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
28 listkey=readfile.getresult(url)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
29 # url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/listkey/"+listkey+"/cids/xml"
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
30 url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/25425,12345/cids/xml"
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
31 print("url: "+url)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
32 xml=readfile.getresult(url)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
33
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
34 #init parser
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
35 handler = DictHandler()
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
36 parser = sax.make_parser()
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
37 parser.setContentHandler(handler)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
38
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
39 tempfile=open("tempfile","w")
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
40 #handle the last line, there is sometimes some random output
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
41 lastline_arr=xml.split("\n")
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
42 #print(lastline_arr)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
43
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
44 print("l: ")
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
45 print(len(lastline_arr))
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
46 lastline=lastline_arr[len(lastline_arr)-1]
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
47 print("lastline: "+lastline)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
48 print("lastline-2: "+lastline_arr[len(lastline_arr)-2])
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
49 cidlastline=getIDofLine(lastline)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
50 aidkey="-1"
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
51 if cidlastline != "-1":
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
52 i=len(lastline_arr)-2
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
53 #search for nex aid entry
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
54 while i >= 0 and "AID" not in lastline_arr[i]:
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
55 i-=1
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
56 if i >= 0:
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
57 aid=getIDofLine(lastline_arr[i])
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
58 if aid != "-1":
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
59 aidkey=aid
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
60 #remove the last line and put the array back together
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
61
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
62 lastline_arr_list=list(lastline_arr)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
63 #lastline_arr_list.remove(lastline)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
64 xml2="\n".join(lastline_arr_list)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
65 tempfile.write(xml2)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
66 #add the last tags
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
67 #tempfile.write("</Information></InformationList>")
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
68 tempfile.close()
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
69 parser.parse(open("tempfile","r"))
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
70 dic=handler.ergebnis
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
71
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
72 #add the last line
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
73 #if cidlastline != "-1":
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
74 # dic[aidkey].append(cidlastline)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
75 return dic
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
76
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
77
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
78 def main(args):
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
79 aid_cid_dict=getAllCidsForAssayActivity(args.target)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
80 write_to_csv(aid_cid_dict, args.outfile)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
81
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
82 if __name__ == "__main__":
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
83 parser = argparse.ArgumentParser()
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
84 parser.add_argument('--outfile', type=argparse.FileType('w'),
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
85 help="Specify output file")
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
86 parser.add_argument('--target', type=str,
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
87 help="Specify output file")
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
88 if len(sys.argv) < 2:
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
89 print "Too few arguments..."
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
90 parser.print_help()
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
91 exit(1)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
92 args = parser.parse_args()
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
93 main( args )