diff rest_tool_assays_with_cids_given_target.py @ 6:9ee84d9fd3a7 draft

Uploaded
author bernhardlutz
date Thu, 03 Apr 2014 06:50:57 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_assays_with_cids_given_target.py	Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+
+
+import sys, os
+import argparse
+import readfile
+
+#get every aid as a list
+#returns a dictionary with aid as key and as value the list of cids
+def getAllAssayIDs():
+    url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT"
+    data=readfile.getresult(url)
+    aidlist=readfile.getListFromString(data)
+    return aidlist
+
+
+def getIDofLine(line):
+    arr=line.split(">")
+    if len(arr) > 1:
+        aid=arr[1].split("<")[0]
+        return aid
+    else:
+        return "-1"
+        
+#get xml of all aids with cids for an activity
+def getAllCidsForAssayActivity(activity):
+    url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/activity/"+activity+"/aids/txt?list_return=listkey"
+    listkey=readfile.getresult(url)
+#    url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/listkey/"+listkey+"/cids/xml"
+    url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/25425,12345/cids/xml"
+    print("url: "+url)
+    xml=readfile.getresult(url)
+    
+    #init parser
+    handler = DictHandler() 
+    parser = sax.make_parser() 
+    parser.setContentHandler(handler) 
+
+    tempfile=open("tempfile","w")
+    #handle the last line, there is sometimes some random output
+    lastline_arr=xml.split("\n")
+    #print(lastline_arr)
+    
+    print("l: ")
+    print(len(lastline_arr))
+    lastline=lastline_arr[len(lastline_arr)-1]
+    print("lastline: "+lastline)
+    print("lastline-2: "+lastline_arr[len(lastline_arr)-2])
+    cidlastline=getIDofLine(lastline)
+    aidkey="-1"
+    if cidlastline != "-1":
+        i=len(lastline_arr)-2
+        #search for nex aid entry
+        while i >= 0 and "AID" not in lastline_arr[i]:
+            i-=1
+        if i >= 0:
+            aid=getIDofLine(lastline_arr[i])
+            if aid != "-1":
+                aidkey=aid
+    #remove the last line and put the array back together
+    
+    lastline_arr_list=list(lastline_arr)
+    #lastline_arr_list.remove(lastline)
+    xml2="\n".join(lastline_arr_list)
+    tempfile.write(xml2)
+    #add the last tags
+    #tempfile.write("</Information></InformationList>")
+    tempfile.close()
+    parser.parse(open("tempfile","r"))
+    dic=handler.ergebnis
+    
+    #add the last line
+    #if cidlastline != "-1":
+    #    dic[aidkey].append(cidlastline)
+    return dic
+
+
+def main(args):
+    aid_cid_dict=getAllCidsForAssayActivity(args.target)
+    write_to_csv(aid_cid_dict, args.outfile)
+    
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--outfile', type=argparse.FileType('w'),
+        help="Specify output file")
+    parser.add_argument('--target', type=str,
+        help="Specify output file")
+    if len(sys.argv) < 2:
+        print "Too few arguments..."
+        parser.print_help()
+        exit(1)
+    args = parser.parse_args()
+    main( args )