6
|
1 #!/usr/bin/env python
|
|
2
|
|
3
|
|
4 import sys, os
|
|
5 import argparse
|
|
6 import urllib2, urllib, httplib
|
|
7 import readfile
|
|
8 import xml.sax as sax
|
|
9
|
|
10 class DictHandler(sax.handler.ContentHandler):
|
|
11
|
|
12 def __init__(self):
|
|
13 self.ergebnis = {}
|
|
14 self.schluessel = ""
|
|
15 self.wert = ""
|
|
16 self.aktiv = None
|
|
17
|
|
18 def startElement(self, name, attrs):
|
|
19 if name == "Information":
|
|
20 self.schluessel = ""
|
|
21 self.wert = ""
|
|
22 elif name == "AID" or name=="CID":
|
|
23 self.aktiv = name
|
|
24
|
|
25 def endElement(self, name):
|
|
26 if name == "AID":
|
|
27 self.schluessel=self.schluessel.strip()
|
|
28 self.ergebnis[self.schluessel]=[]
|
|
29 #print("huhn")
|
|
30 self.aktiv=None
|
|
31 elif name == "CID":
|
|
32 self.aktiv = None
|
|
33 self.ergebnis[self.schluessel].append(self.wert)
|
|
34 self.wert=""
|
|
35 def characters(self, content):
|
|
36 if self.aktiv == "AID":
|
|
37 self.schluessel += content
|
|
38 elif self.aktiv == "CID":
|
|
39 self.wert += content
|
|
40
|
|
41
|
|
42 def give_aid_cid_dict_from_xml(xmlfile):
|
|
43 handler = DictHandler()
|
|
44 parser = sax.make_parser()
|
|
45 parser.setContentHandler(handler)
|
|
46 parser.parse(xmlfile)
|
|
47 dic=handler.ergebnis
|
|
48 return dic
|
|
49
|
|
50 #get every aid as a list
|
|
51 #returns a dictionary with aid as key and as value the list of cids
|
|
52 def getAllAssayIDs():
|
|
53 url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT"
|
|
54 data=readfile.getresult(url)
|
|
55 aidlist=readfile.getListFromString(data)
|
|
56 return aidlist
|
|
57
|
|
58
|
|
59 def getIDofLine(line):
|
|
60 arr=line.split(">")
|
|
61 if len(arr) > 1:
|
|
62 aid=arr[1].split("<")[0]
|
|
63 return aid
|
|
64 else:
|
|
65 return "-1"
|
|
66
|
|
67 def write_to_csv(aid_cid_dict, outfile):
|
|
68 for key in aid_cid_dict:
|
|
69 for cid in aid_cid_dict[key]:
|
|
70 outfile.write(key)
|
|
71 outfile.write(",")
|
|
72 outfile.write(cid)
|
|
73 outfile.write("\n")
|
|
74
|