# HG changeset patch
# User bernhardlutz
# Date 1396522257 14400
# Node ID 9ee84d9fd3a7efcf6e183f757081d2aaded21ab2
# Parent  1ad35668671798d92e0c2210e2b4fc0e609d31fb
Uploaded
diff -r 1ad356686717 -r 9ee84d9fd3a7 readfile.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readfile.py	Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+import io
+import urllib2, urllib, httplib
+def getListFromFile(file):
+    idlist=[]
+    for line in file:
+        if int(line):
+            idlist.append(line.strip())
+    return idlist
+
+def getresult(url):
+    try:
+        connection = urllib2.urlopen(url)
+    except urllib2.HTTPError, e:
+        return ""
+    else:
+        return connection.read().rstrip()
+        
diff -r 1ad356686717 -r 9ee84d9fd3a7 readfile.pyc
Binary file readfile.pyc has changed
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool.py	Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+
+import sys, os
+import argparse
+
+import readfile
+
+txt_output=["cids", "aids", "sids", "synonyms" ]
+csv_output=["assaysummary"]
+check_for_id_type=["cids", "aids", "sids"]
+
+def main(args):
+    url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/"+args.type+"/"
+    if args.type == "assay":
+        url+="aid/"
+    elif args.type == "compound":
+        url+="cid/"
+    elif args.type == "substance":
+        url+="sid/"
+    if args.id_file is None:
+        idstring=str(args.id)
+    else:
+        idlist=readfile.getListFromFile(args.id_file)
+        idstring=",".join(idlist)
+    url+=idstring+"/"+args.operation+"/"
+    if args.operation in csv_output:
+        url+="csv"
+    elif args.operation in txt_output:
+        url+="txt"
+    else:
+        url+="xml"
+    if args.operation in check_for_id_type and not args.id_type is None:
+        url+="?"+args.operation+"_type="+args.id_type
+    print(url)
+    data=readfile.getresult(url)
+    outfile=args.outfile
+    outfile.write(data)
+    outfile.close()
+    
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--type', type=str, required=True,
+        help="That you want BioAssay Compund ...")
+    parser.add_argument('--id', type=str,
+        help="Specify the ID")
+    parser.add_argument('--operation', type=str, required=True,
+        help="Specify the operation")
+    parser.add_argument('--property-value', dest="property_value", type=str,
+        help="Specify the property")
+    parser.add_argument('--id-type', dest="id_type", type=str,
+        help="Specify the property")
+    parser.add_argument('--outfile', type=argparse.FileType('w'), required=True,
+        help="Specify one output file")
+    parser.add_argument('--id-file', dest="id_file", type=argparse.FileType('r'),
+        help="Specify a file with a list of ids, one per line")
+    if len(sys.argv) < 8:
+        print "Too few arguments..."
+        parser.print_help()
+        exit(1)
+    args = parser.parse_args()
+    main( args )
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool.xml	Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,256 @@
+
+    Fetch pubchem data
+    echo "0.1.0"
+    
+        REST_TOOL_SCRIPT_PATH
+    
+    
+        rest_tool_macros.xml
+    
+    
+        #if $choose_action.action == 'search':
+        rest_tool_search.py --type $choose_action.input_type_search --name $choose_action.search_name --outfile $output
+        
+        #elif $choose_action.action == 'specific_data':
+        rest_tool.py
+            #if $choose_action.field_or_file1.field_or_file1 == 'field':
+            --id $choose_action.field_or_file1.id1 
+            #else:
+            --id-file $choose_action.field_or_file1.file_ids_1
+            #end if
+            --type $choose_action.choose_acs.input_type
+            
+            #if $choose_action.choose_acs.input_type == 'assay':
+            --operation $choose_action.choose_acs.operation_assay.operation_assay
+                #if $choose_action.choose_acs.operation_assay.operation_assay == 'property':
+                    --property-value $choose_action.choose_acs.operation_assay.property_assay
+                #elif $choose_action.choose_acs.operation_assay.operation_assay == 'aids':
+                    --id-type $choose_action.choose_acs.operation_assay.aids_type_assay
+                #elif $choose_action.choose_acs.operation_assay.operation_assay == 'cids':
+                    --id-type $choose_action.choose_acs.operation_assay.cids_type_assay
+                #elif $choose_action.choose_acs.operation_assay.operation_assay == 'sids':
+                    --id-type $choose_action.choose_acs.operation_assay.sids_type_assay
+                #end if
+            #elif $choose_action.choose_acs.input_type == 'compound':
+            --operation $choose_action.choose_acs.operation_compound.operation_compound
+                #if $choose_action.choose_acs.operation_compound.operation_compound == 'property':
+                    --property-value $choose_action.choose_acs.operation_compound.property_compound
+                #elif $choose_action.choose_acs.operation_compound.operation_compound == 'aids':
+                    --id-type $choose_action.choose_acs.operation_compound.aids_type_compound
+                #elif $choose_action.choose_acs.operation_compound.operation_compound == 'cids':
+                    --id-type $choose_action.choose_acs.operation_compound.cids_type_compound
+                #elif $choose_action.choose_acs.operation_compound.operation_compound == 'sids':
+                    --id-type $choose_action.choose_acs.operation_compound.sids_type_compound
+                #end if
+            #else:
+            --operation $choose_action.choose_acs.operation_substance.operation_substance
+                #if $choose_action.choose_acs.operation_substance.operation_substance == 'property':
+                    --property-value $choose_action.choose_acs.operation_substance.property_substance
+                #elif $choose_action.choose_acs.operation_substance.operation_substance == 'aids':
+                    --id-type $choose_action.choose_acs.operation_substance.aids_type_substance
+                #elif $choose_action.choose_acs.operation_substance.operation_substance == 'cids':
+                    --id-type $choose_action.choose_acs.operation_substance.cids_type_substance
+                #elif $choose_action.choose_acs.operation_substance.operation_substance == 'sids':
+                    --id-type $choose_action.choose_acs.operation_substance.sids_type_substance
+                #end if
+            #end if
+            --outfile $output
+        #elif $choose_action.action == 'compounds_for_assay':
+            rest_tool_comp_for_assay.py
+            #if $choose_action.field_or_file2.field_or_file2 == 'field':
+            --aid $choose_action.field_or_file2.id2 
+            #else:
+            --aid-file $choose_action.field_or_file2.file_ids_2
+            #end if
+            --outfile $output
+        #elif $choose_action.action == 'assays_by_activity_or_target':
+            rest_tool_assay_by_activity_or_target.py
+            #if $choose_action.activity_or_target.activity_or_target == 'activity':
+                --activity $choose_action.activity_or_target.activity
+            #else:
+                --target $choose_action.activity_or_target.target_id
+                --target-type $choose_action.activity_or_target.target_identifier_type
+            #end if
+            --outfile $output
+
+        #end if
+    
+
+    
+        
+            
+                
+                
+                
+                
+            
+            
+                    
+                        
+                        
+                        
+                    
+                    
+                        
+                    
+                    
+            
+            
+                
+                        
+                            
+                            
+                        
+                        
+                            
+                        
+                        
+                            
+                        
+                
+                
+                    
+                        
+                        
+                        
+                    
+        
+                    
+                        
+                            
+                                
+                                
+                                
+                                
+                                
+                                
+                            
+                            
+                                
+                            
+                            
+                                
+                                    
+                                
+                            
+                            
+                                
+                                    
+                                    
+                                
+                            
+                            
+                                
+                                    
+                                    
+                                
+                            
+                        
+                    
+                    
+                        
+                            
+                                
+                                
+                                
+                            
+                            
+                                
+                            
+                            
+                                
+                                    
+                                
+                            
+                            
+                                
+
+                                    
+                                
+                            
+                            
+                                
+                                    
+                                
+                            
+                        
+                    
+                    
+                        
+                            
+                                
+                                
+                            
+                            
+                                
+                            
+                            
+                                
+                                    
+                                
+                            
+                            
+                                
+                                    
+                                
+                            
+                            
+                                
+                                    
+                                
+                            
+                            
+                        
+                    
+                
+            
+            
+            
+                
+                    
+                        
+                        
+                    
+                    
+                        
+                    
+                    
+                        
+                    
+                
+            
+            
+            
+                
+                    
+                        
+                        
+                    
+                    
+                        
+                    
+            
+                    
+                        
+                            
+                            
+                            
+                        
+                        
+                    
+                
+            
+            
+        
+    
+    
+        
+    
+    
+    
+    
+**What it does**
+
+This tool fetches data from pubchem
+    
+
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_alt.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_alt.xml	Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,156 @@
+
+    Fetch pubchem data
+    echo "0.1.0"
+    
+        REST_TOOL_SCRIPT_PATH
+    
+    
+        #if $choose_action.action == 'specific_data':
+        rest_tool.py
+            #if $choose_action.field_or_file1.field_or_file1 == 'field':
+            --id $choose_action.field_or_file1.id1 
+            #else:
+            --idfile $choose_action.field_or_file1.file_ids_1
+            #end if
+            --type $choose_action.input_type 
+            
+            --operation $choose_action.operation_property.operation
+            #if $choose_action.operation_property.operation == 'property':
+                --property-value $choose_action.operation_property.property
+            #end if
+            
+            --outfile $output
+        #elif $choose_action.action == 'compounds_for_assay':
+            rest_tool_comp_for_assay.py
+            #if $choose_action.field_or_file2.field_or_file2 == 'field':
+            --aid $choose_action.field_or_file2.id2 
+            #else:
+            --aidfile $choose_action.field_or_file2.file_ids_2
+            #end if
+            --outfile $output
+        #elif $choose_action.action == 'assays_by_activity':
+            rest_tool_assay_by_activity_or_target.py --activity $choose_action.activity --outfile $output
+        #elif $choose_action.action == 'assays_by_targets':
+            rest_tool_assay_by_activity_or_target.py --targettype $choose_action.target_identifier_type --targetid $choose_action.target_id --outfile $output
+        #end if
+    
+
+    
+        
+            
+                
+                
+                
+            
+            
+                
+                    
+                        
+                        
+                        
+                    
+                    
+                    
+                        
+                            
+                            
+                        
+                        
+                            
+                        
+                        
+                            
+                        
+                    
+                    
+                    
+                        
+                            
+                                
+                                
+                                
+                                
+                                
+                                
+                                
+                            
+                            
+                                
+                            
+                        
+                    
+                    
+                        
+                            
+                                
+                                
+                                
+                                
+                                
+                                
+                                
+                                
+                            
+                            
+                                
+                            
+                        
+                    
+                    
+                        
+                            
+                                
+                                
+                                
+                                
+                                
+                                
+                                
+                                
+                            
+                            
+                                
+                            
+                        
+                    
+                
+            
+            
+                
+                    
+                        
+                        
+                    
+                    
+                        
+                    
+                    
+                        
+                    
+                
+            
+            
+                
+            
+            
+                
+                    
+                    
+                    
+                
+                
+            
+            
+        
+    
+    
+        
+    
+    
+    
+    
+**What it does**
+
+This tool fetches data from pubchem
+    
+
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_assay_by_activity_or_target.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_assay_by_activity_or_target.py	Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+import sys, os
+import argparse
+import readfile
+
+def main(args):
+    #search for acitivity or target
+    url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/"
+    if args.activity is None:
+        #target
+        url+="target/"+args.targettype+"/"+args.targetid
+    else:
+        url+="activity/"+args.activity
+    url+="/aids/txt"
+    data=readfile.getresult(url)
+    args.outfile.write(data)
+    args.outfile.close()
+    
+    
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--activity', type=str,
+        help="Activities you are looking for")
+    parser.add_argument('--target-type', dest="target_type", type=str,
+        help="The target identifier type")
+    parser.add_argument('--target-id', dest="target_id", type=str,
+        help="The specific target")
+    parser.add_argument('--outfile', type=argparse.FileType('w'), required=True,
+        help="Specify output file")
+    if len(sys.argv) < 2:
+        print "Too few arguments..."
+        parser.print_help()
+        exit(1)
+    args = parser.parse_args()
+    main( args )
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_assays_with_cids_given_target.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_assays_with_cids_given_target.py	Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+
+
+import sys, os
+import argparse
+import readfile
+
+#get every aid as a list
+#returns a dictionary with aid as key and as value the list of cids
+def getAllAssayIDs():
+    url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT"
+    data=readfile.getresult(url)
+    aidlist=readfile.getListFromString(data)
+    return aidlist
+
+
+def getIDofLine(line):
+    arr=line.split(">")
+    if len(arr) > 1:
+        aid=arr[1].split("<")[0]
+        return aid
+    else:
+        return "-1"
+        
+#get xml of all aids with cids for an activity
+def getAllCidsForAssayActivity(activity):
+    url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/activity/"+activity+"/aids/txt?list_return=listkey"
+    listkey=readfile.getresult(url)
+#    url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/listkey/"+listkey+"/cids/xml"
+    url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/25425,12345/cids/xml"
+    print("url: "+url)
+    xml=readfile.getresult(url)
+    
+    #init parser
+    handler = DictHandler() 
+    parser = sax.make_parser() 
+    parser.setContentHandler(handler) 
+
+    tempfile=open("tempfile","w")
+    #handle the last line, there is sometimes some random output
+    lastline_arr=xml.split("\n")
+    #print(lastline_arr)
+    
+    print("l: ")
+    print(len(lastline_arr))
+    lastline=lastline_arr[len(lastline_arr)-1]
+    print("lastline: "+lastline)
+    print("lastline-2: "+lastline_arr[len(lastline_arr)-2])
+    cidlastline=getIDofLine(lastline)
+    aidkey="-1"
+    if cidlastline != "-1":
+        i=len(lastline_arr)-2
+        #search for nex aid entry
+        while i >= 0 and "AID" not in lastline_arr[i]:
+            i-=1
+        if i >= 0:
+            aid=getIDofLine(lastline_arr[i])
+            if aid != "-1":
+                aidkey=aid
+    #remove the last line and put the array back together
+    
+    lastline_arr_list=list(lastline_arr)
+    #lastline_arr_list.remove(lastline)
+    xml2="\n".join(lastline_arr_list)
+    tempfile.write(xml2)
+    #add the last tags
+    #tempfile.write("")
+    tempfile.close()
+    parser.parse(open("tempfile","r"))
+    dic=handler.ergebnis
+    
+    #add the last line
+    #if cidlastline != "-1":
+    #    dic[aidkey].append(cidlastline)
+    return dic
+
+
+def main(args):
+    aid_cid_dict=getAllCidsForAssayActivity(args.target)
+    write_to_csv(aid_cid_dict, args.outfile)
+    
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--outfile', type=argparse.FileType('w'),
+        help="Specify output file")
+    parser.add_argument('--target', type=str,
+        help="Specify output file")
+    if len(sys.argv) < 2:
+        print "Too few arguments..."
+        parser.print_help()
+        exit(1)
+    args = parser.parse_args()
+    main( args )
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_comp_for_assay.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_comp_for_assay.py	Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+
+import sys, os
+import argparse
+import tempfile
+import readfile
+import rest_tool_functions
+
+        
+#get the cids for bioassay aid
+def get_aid_cid_dict_for_list(aidlist):
+    aidliststring=",".join(aidlist)
+    url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/"+aidliststring+"/cids/xml"
+    xml=readfile.getresult(url)
+    tmp = tempfile.TemporaryFile() 
+    tmp.write(xml)
+    tmp.seek(0)
+    dic=rest_tool_functions.give_aid_cid_dict_from_xml(tmp)
+    tmp.close()
+    return dic
+        
+def main(args):
+    if args.aid_file is None:
+        aidlist=args.aid.split(",")
+    else:
+        aidlist=readfile.getListFromFile(args.aid_file)
+    dic=get_aid_cid_dict_for_list(aidlist)
+    rest_tool_functions.write_to_csv(dic, args.outfile)
+    
+    
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--aid', type=str,
+        help="AIDs of the BioAssay")
+    parser.add_argument('--aid-file', dest="aid_file", type=argparse.FileType('r'),
+        help="Specify a file with a list of aids, one per line")
+    parser.add_argument('--outfile', type=argparse.FileType('w'),
+        help="Specify output file")
+    if len(sys.argv) < 2:
+        print "Too few arguments..."
+        parser.print_help()
+        exit(1)
+    args = parser.parse_args()
+    main( args )
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_functions.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_functions.py	Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+
+import sys, os
+import argparse
+import urllib2, urllib, httplib
+import readfile
+import xml.sax as sax
+
+class DictHandler(sax.handler.ContentHandler):
+
+    def __init__(self): 
+        self.ergebnis = {} 
+        self.schluessel = "" 
+        self.wert = "" 
+        self.aktiv = None 
+
+    def startElement(self, name, attrs): 
+        if name == "Information": 
+            self.schluessel = "" 
+            self.wert = "" 
+        elif name == "AID" or name=="CID": 
+            self.aktiv = name 
+
+    def endElement(self, name): 
+        if name == "AID":
+            self.schluessel=self.schluessel.strip()
+            self.ergebnis[self.schluessel]=[]
+            #print("huhn")
+            self.aktiv=None
+        elif name == "CID": 
+            self.aktiv = None
+            self.ergebnis[self.schluessel].append(self.wert)
+            self.wert=""
+    def characters(self, content): 
+        if self.aktiv == "AID": 
+            self.schluessel += content 
+        elif self.aktiv == "CID":
+            self.wert += content
+            
+
+def give_aid_cid_dict_from_xml(xmlfile):
+    handler = DictHandler() 
+    parser = sax.make_parser() 
+    parser.setContentHandler(handler)
+    parser.parse(xmlfile)
+    dic=handler.ergebnis
+    return dic
+    
+#get every aid as a list
+#returns a dictionary with aid as key and as value the list of cids
+def getAllAssayIDs():
+    url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT"
+    data=readfile.getresult(url)
+    aidlist=readfile.getListFromString(data)
+    return aidlist
+
+
+def getIDofLine(line):
+    arr=line.split(">")
+    if len(arr) > 1:
+        aid=arr[1].split("<")[0]
+        return aid
+    else:
+        return "-1"
+
+def write_to_csv(aid_cid_dict, outfile):
+    for key in aid_cid_dict:
+        for cid in aid_cid_dict[key]:
+            outfile.write(key)
+            outfile.write(",")
+            outfile.write(cid)
+            outfile.write("\n")
+
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_macros.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_macros.xml	Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,66 @@
+
+    
+        
+        
+        
+    
+    
+    
+        
+        
+        
+    
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
+    
+        
+        
+        
+    
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
+    
+
+                
+                
+                
+    
+    
+        
+        
+        
+        
+        
+        
+    
+    
+
+ 
+
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_search.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_search.py	Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+import sys, os
+import argparse
+import readfile
+
+def main(args):
+    #search for acitivity or target
+    url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/"+args.type+"/name/"+args.name
+    if args.type == "assay":
+        url+="/aids"
+    elif args.type == "compound":
+        url+="/cids"
+    else:
+        url+="/sids"
+    url+="/txt"
+    #print("url: "+url)
+    data=readfile.getresult(url)
+    args.outfile.write(data)
+    args.outfile.close()
+    
+    
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--name', type=str, required=True,
+        help="Enter the name")
+    parser.add_argument('--type', type=str, required=True,
+        help="What you want to search for")
+    parser.add_argument('--outfile', type=argparse.FileType('w'), required=True,
+        help="Specify output file")
+    if len(sys.argv) < 2:
+        print "Too few arguments..."
+        parser.print_help()
+        exit(1)
+    args = parser.parse_args()
+    main( args )
diff -r 1ad356686717 -r 9ee84d9fd3a7 tool_dependencies.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,5 @@
+
+    
+        $REPOSITORY_INSTALL_DIR
+    
+