annotate rest_tool_functions.py @ 7:35b41070c20d draft

Deleted selected files
author bernhardlutz
date Thu, 03 Apr 2014 06:51:54 -0400
parents 9ee84d9fd3a7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
1 #!/usr/bin/env python
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
2
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
3
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
4 import sys, os
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
5 import argparse
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
6 import urllib2, urllib, httplib
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
7 import readfile
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
8 import xml.sax as sax
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
9
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
10 class DictHandler(sax.handler.ContentHandler):
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
11
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
12 def __init__(self):
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
13 self.ergebnis = {}
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
14 self.schluessel = ""
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
15 self.wert = ""
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
16 self.aktiv = None
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
17
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
18 def startElement(self, name, attrs):
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
19 if name == "Information":
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
20 self.schluessel = ""
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
21 self.wert = ""
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
22 elif name == "AID" or name=="CID":
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
23 self.aktiv = name
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
24
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
25 def endElement(self, name):
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
26 if name == "AID":
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
27 self.schluessel=self.schluessel.strip()
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
28 self.ergebnis[self.schluessel]=[]
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
29 #print("huhn")
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
30 self.aktiv=None
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
31 elif name == "CID":
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
32 self.aktiv = None
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
33 self.ergebnis[self.schluessel].append(self.wert)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
34 self.wert=""
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
35 def characters(self, content):
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
36 if self.aktiv == "AID":
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
37 self.schluessel += content
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
38 elif self.aktiv == "CID":
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
39 self.wert += content
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
40
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
41
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
42 def give_aid_cid_dict_from_xml(xmlfile):
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
43 handler = DictHandler()
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
44 parser = sax.make_parser()
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
45 parser.setContentHandler(handler)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
46 parser.parse(xmlfile)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
47 dic=handler.ergebnis
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
48 return dic
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
49
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
50 #get every aid as a list
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
51 #returns a dictionary with aid as key and as value the list of cids
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
52 def getAllAssayIDs():
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
53 url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT"
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
54 data=readfile.getresult(url)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
55 aidlist=readfile.getListFromString(data)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
56 return aidlist
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
57
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
58
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
59 def getIDofLine(line):
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
60 arr=line.split(">")
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
61 if len(arr) > 1:
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
62 aid=arr[1].split("<")[0]
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
63 return aid
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
64 else:
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
65 return "-1"
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
66
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
67 def write_to_csv(aid_cid_dict, outfile):
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
68 for key in aid_cid_dict:
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
69 for cid in aid_cid_dict[key]:
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
70 outfile.write(key)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
71 outfile.write(",")
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
72 outfile.write(cid)
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
73 outfile.write("\n")
9ee84d9fd3a7 Uploaded
bernhardlutz
parents:
diff changeset
74