comparison rest_tool_functions.py @ 6:9ee84d9fd3a7 draft

Uploaded
author bernhardlutz
date Thu, 03 Apr 2014 06:50:57 -0400
parents
children
comparison
equal deleted inserted replaced
5:1ad356686717 6:9ee84d9fd3a7
1 #!/usr/bin/env python
2
3
4 import sys, os
5 import argparse
6 import urllib2, urllib, httplib
7 import readfile
8 import xml.sax as sax
9
10 class DictHandler(sax.handler.ContentHandler):
11
12 def __init__(self):
13 self.ergebnis = {}
14 self.schluessel = ""
15 self.wert = ""
16 self.aktiv = None
17
18 def startElement(self, name, attrs):
19 if name == "Information":
20 self.schluessel = ""
21 self.wert = ""
22 elif name == "AID" or name=="CID":
23 self.aktiv = name
24
25 def endElement(self, name):
26 if name == "AID":
27 self.schluessel=self.schluessel.strip()
28 self.ergebnis[self.schluessel]=[]
29 #print("huhn")
30 self.aktiv=None
31 elif name == "CID":
32 self.aktiv = None
33 self.ergebnis[self.schluessel].append(self.wert)
34 self.wert=""
35 def characters(self, content):
36 if self.aktiv == "AID":
37 self.schluessel += content
38 elif self.aktiv == "CID":
39 self.wert += content
40
41
42 def give_aid_cid_dict_from_xml(xmlfile):
43 handler = DictHandler()
44 parser = sax.make_parser()
45 parser.setContentHandler(handler)
46 parser.parse(xmlfile)
47 dic=handler.ergebnis
48 return dic
49
50 #get every aid as a list
51 #returns a dictionary with aid as key and as value the list of cids
52 def getAllAssayIDs():
53 url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT"
54 data=readfile.getresult(url)
55 aidlist=readfile.getListFromString(data)
56 return aidlist
57
58
59 def getIDofLine(line):
60 arr=line.split(">")
61 if len(arr) > 1:
62 aid=arr[1].split("<")[0]
63 return aid
64 else:
65 return "-1"
66
67 def write_to_csv(aid_cid_dict, outfile):
68 for key in aid_cid_dict:
69 for cid in aid_cid_dict[key]:
70 outfile.write(key)
71 outfile.write(",")
72 outfile.write(cid)
73 outfile.write("\n")
74