# HG changeset patch
# User bernhardlutz
# Date 1396522257 14400
# Node ID 9ee84d9fd3a7efcf6e183f757081d2aaded21ab2
# Parent 1ad35668671798d92e0c2210e2b4fc0e609d31fb
Uploaded
diff -r 1ad356686717 -r 9ee84d9fd3a7 readfile.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/readfile.py Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+import io
+import urllib2, urllib, httplib
+def getListFromFile(file):
+ idlist=[]
+ for line in file:
+ if int(line):
+ idlist.append(line.strip())
+ return idlist
+
+def getresult(url):
+ try:
+ connection = urllib2.urlopen(url)
+ except urllib2.HTTPError, e:
+ return ""
+ else:
+ return connection.read().rstrip()
+
diff -r 1ad356686717 -r 9ee84d9fd3a7 readfile.pyc
Binary file readfile.pyc has changed
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool.py Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+
+import sys, os
+import argparse
+
+import readfile
+
+txt_output=["cids", "aids", "sids", "synonyms" ]
+csv_output=["assaysummary"]
+check_for_id_type=["cids", "aids", "sids"]
+
+def main(args):
+ url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/"+args.type+"/"
+ if args.type == "assay":
+ url+="aid/"
+ elif args.type == "compound":
+ url+="cid/"
+ elif args.type == "substance":
+ url+="sid/"
+ if args.id_file is None:
+ idstring=str(args.id)
+ else:
+ idlist=readfile.getListFromFile(args.id_file)
+ idstring=",".join(idlist)
+ url+=idstring+"/"+args.operation+"/"
+ if args.operation in csv_output:
+ url+="csv"
+ elif args.operation in txt_output:
+ url+="txt"
+ else:
+ url+="xml"
+ if args.operation in check_for_id_type and not args.id_type is None:
+ url+="?"+args.operation+"_type="+args.id_type
+ print(url)
+ data=readfile.getresult(url)
+ outfile=args.outfile
+ outfile.write(data)
+ outfile.close()
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--type', type=str, required=True,
+ help="That you want BioAssay Compund ...")
+ parser.add_argument('--id', type=str,
+ help="Specify the ID")
+ parser.add_argument('--operation', type=str, required=True,
+ help="Specify the operation")
+ parser.add_argument('--property-value', dest="property_value", type=str,
+ help="Specify the property")
+ parser.add_argument('--id-type', dest="id_type", type=str,
+ help="Specify the property")
+ parser.add_argument('--outfile', type=argparse.FileType('w'), required=True,
+ help="Specify one output file")
+ parser.add_argument('--id-file', dest="id_file", type=argparse.FileType('r'),
+ help="Specify a file with a list of ids, one per line")
+ if len(sys.argv) < 8:
+ print "Too few arguments..."
+ parser.print_help()
+ exit(1)
+ args = parser.parse_args()
+ main( args )
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool.xml Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,256 @@
+
+ Fetch pubchem data
+ echo "0.1.0"
+
+ REST_TOOL_SCRIPT_PATH
+
+
+ rest_tool_macros.xml
+
+
+ #if $choose_action.action == 'search':
+ rest_tool_search.py --type $choose_action.input_type_search --name $choose_action.search_name --outfile $output
+
+ #elif $choose_action.action == 'specific_data':
+ rest_tool.py
+ #if $choose_action.field_or_file1.field_or_file1 == 'field':
+ --id $choose_action.field_or_file1.id1
+ #else:
+ --id-file $choose_action.field_or_file1.file_ids_1
+ #end if
+ --type $choose_action.choose_acs.input_type
+
+ #if $choose_action.choose_acs.input_type == 'assay':
+ --operation $choose_action.choose_acs.operation_assay.operation_assay
+ #if $choose_action.choose_acs.operation_assay.operation_assay == 'property':
+ --property-value $choose_action.choose_acs.operation_assay.property_assay
+ #elif $choose_action.choose_acs.operation_assay.operation_assay == 'aids':
+ --id-type $choose_action.choose_acs.operation_assay.aids_type_assay
+ #elif $choose_action.choose_acs.operation_assay.operation_assay == 'cids':
+ --id-type $choose_action.choose_acs.operation_assay.cids_type_assay
+ #elif $choose_action.choose_acs.operation_assay.operation_assay == 'sids':
+ --id-type $choose_action.choose_acs.operation_assay.sids_type_assay
+ #end if
+ #elif $choose_action.choose_acs.input_type == 'compound':
+ --operation $choose_action.choose_acs.operation_compound.operation_compound
+ #if $choose_action.choose_acs.operation_compound.operation_compound == 'property':
+ --property-value $choose_action.choose_acs.operation_compound.property_compound
+ #elif $choose_action.choose_acs.operation_compound.operation_compound == 'aids':
+ --id-type $choose_action.choose_acs.operation_compound.aids_type_compound
+ #elif $choose_action.choose_acs.operation_compound.operation_compound == 'cids':
+ --id-type $choose_action.choose_acs.operation_compound.cids_type_compound
+ #elif $choose_action.choose_acs.operation_compound.operation_compound == 'sids':
+ --id-type $choose_action.choose_acs.operation_compound.sids_type_compound
+ #end if
+ #else:
+ --operation $choose_action.choose_acs.operation_substance.operation_substance
+ #if $choose_action.choose_acs.operation_substance.operation_substance == 'property':
+ --property-value $choose_action.choose_acs.operation_substance.property_substance
+ #elif $choose_action.choose_acs.operation_substance.operation_substance == 'aids':
+ --id-type $choose_action.choose_acs.operation_substance.aids_type_substance
+ #elif $choose_action.choose_acs.operation_substance.operation_substance == 'cids':
+ --id-type $choose_action.choose_acs.operation_substance.cids_type_substance
+ #elif $choose_action.choose_acs.operation_substance.operation_substance == 'sids':
+ --id-type $choose_action.choose_acs.operation_substance.sids_type_substance
+ #end if
+ #end if
+ --outfile $output
+ #elif $choose_action.action == 'compounds_for_assay':
+ rest_tool_comp_for_assay.py
+ #if $choose_action.field_or_file2.field_or_file2 == 'field':
+ --aid $choose_action.field_or_file2.id2
+ #else:
+ --aid-file $choose_action.field_or_file2.file_ids_2
+ #end if
+ --outfile $output
+ #elif $choose_action.action == 'assays_by_activity_or_target':
+ rest_tool_assay_by_activity_or_target.py
+ #if $choose_action.activity_or_target.activity_or_target == 'activity':
+ --activity $choose_action.activity_or_target.activity
+ #else:
+ --target $choose_action.activity_or_target.target_id
+ --target-type $choose_action.activity_or_target.target_identifier_type
+ #end if
+ --outfile $output
+
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool fetches data from pubchem
+
+
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_alt.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_alt.xml Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,156 @@
+
+ Fetch pubchem data
+ echo "0.1.0"
+
+ REST_TOOL_SCRIPT_PATH
+
+
+ #if $choose_action.action == 'specific_data':
+ rest_tool.py
+ #if $choose_action.field_or_file1.field_or_file1 == 'field':
+ --id $choose_action.field_or_file1.id1
+ #else:
+ --idfile $choose_action.field_or_file1.file_ids_1
+ #end if
+ --type $choose_action.input_type
+
+ --operation $choose_action.operation_property.operation
+ #if $choose_action.operation_property.operation == 'property':
+ --property-value $choose_action.operation_property.property
+ #end if
+
+ --outfile $output
+ #elif $choose_action.action == 'compounds_for_assay':
+ rest_tool_comp_for_assay.py
+ #if $choose_action.field_or_file2.field_or_file2 == 'field':
+ --aid $choose_action.field_or_file2.id2
+ #else:
+ --aidfile $choose_action.field_or_file2.file_ids_2
+ #end if
+ --outfile $output
+ #elif $choose_action.action == 'assays_by_activity':
+ rest_tool_assay_by_activity_or_target.py --activity $choose_action.activity --outfile $output
+ #elif $choose_action.action == 'assays_by_targets':
+ rest_tool_assay_by_activity_or_target.py --targettype $choose_action.target_identifier_type --targetid $choose_action.target_id --outfile $output
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool fetches data from pubchem
+
+
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_assay_by_activity_or_target.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_assay_by_activity_or_target.py Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+import sys, os
+import argparse
+import readfile
+
+def main(args):
+ #search for acitivity or target
+ url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/"
+ if args.activity is None:
+ #target
+ url+="target/"+args.targettype+"/"+args.targetid
+ else:
+ url+="activity/"+args.activity
+ url+="/aids/txt"
+ data=readfile.getresult(url)
+ args.outfile.write(data)
+ args.outfile.close()
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--activity', type=str,
+ help="Activities you are looking for")
+ parser.add_argument('--target-type', dest="target_type", type=str,
+ help="The target identifier type")
+ parser.add_argument('--target-id', dest="target_id", type=str,
+ help="The specific target")
+ parser.add_argument('--outfile', type=argparse.FileType('w'), required=True,
+ help="Specify output file")
+ if len(sys.argv) < 2:
+ print "Too few arguments..."
+ parser.print_help()
+ exit(1)
+ args = parser.parse_args()
+ main( args )
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_assays_with_cids_given_target.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_assays_with_cids_given_target.py Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+
+
+import sys, os
+import argparse
+import readfile
+
+#get every aid as a list
+#returns a dictionary with aid as key and as value the list of cids
+def getAllAssayIDs():
+ url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT"
+ data=readfile.getresult(url)
+ aidlist=readfile.getListFromString(data)
+ return aidlist
+
+
+def getIDofLine(line):
+ arr=line.split(">")
+ if len(arr) > 1:
+ aid=arr[1].split("<")[0]
+ return aid
+ else:
+ return "-1"
+
+#get xml of all aids with cids for an activity
+def getAllCidsForAssayActivity(activity):
+ url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/activity/"+activity+"/aids/txt?list_return=listkey"
+ listkey=readfile.getresult(url)
+# url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/listkey/"+listkey+"/cids/xml"
+ url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/25425,12345/cids/xml"
+ print("url: "+url)
+ xml=readfile.getresult(url)
+
+ #init parser
+ handler = DictHandler()
+ parser = sax.make_parser()
+ parser.setContentHandler(handler)
+
+ tempfile=open("tempfile","w")
+ #handle the last line, there is sometimes some random output
+ lastline_arr=xml.split("\n")
+ #print(lastline_arr)
+
+ print("l: ")
+ print(len(lastline_arr))
+ lastline=lastline_arr[len(lastline_arr)-1]
+ print("lastline: "+lastline)
+ print("lastline-2: "+lastline_arr[len(lastline_arr)-2])
+ cidlastline=getIDofLine(lastline)
+ aidkey="-1"
+ if cidlastline != "-1":
+ i=len(lastline_arr)-2
+ #search for nex aid entry
+ while i >= 0 and "AID" not in lastline_arr[i]:
+ i-=1
+ if i >= 0:
+ aid=getIDofLine(lastline_arr[i])
+ if aid != "-1":
+ aidkey=aid
+ #remove the last line and put the array back together
+
+ lastline_arr_list=list(lastline_arr)
+ #lastline_arr_list.remove(lastline)
+ xml2="\n".join(lastline_arr_list)
+ tempfile.write(xml2)
+ #add the last tags
+ #tempfile.write("")
+ tempfile.close()
+ parser.parse(open("tempfile","r"))
+ dic=handler.ergebnis
+
+ #add the last line
+ #if cidlastline != "-1":
+ # dic[aidkey].append(cidlastline)
+ return dic
+
+
+def main(args):
+ aid_cid_dict=getAllCidsForAssayActivity(args.target)
+ write_to_csv(aid_cid_dict, args.outfile)
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--outfile', type=argparse.FileType('w'),
+ help="Specify output file")
+ parser.add_argument('--target', type=str,
+ help="Specify output file")
+ if len(sys.argv) < 2:
+ print "Too few arguments..."
+ parser.print_help()
+ exit(1)
+ args = parser.parse_args()
+ main( args )
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_comp_for_assay.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_comp_for_assay.py Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+
+import sys, os
+import argparse
+import tempfile
+import readfile
+import rest_tool_functions
+
+
+#get the cids for bioassay aid
+def get_aid_cid_dict_for_list(aidlist):
+ aidliststring=",".join(aidlist)
+ url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/"+aidliststring+"/cids/xml"
+ xml=readfile.getresult(url)
+ tmp = tempfile.TemporaryFile()
+ tmp.write(xml)
+ tmp.seek(0)
+ dic=rest_tool_functions.give_aid_cid_dict_from_xml(tmp)
+ tmp.close()
+ return dic
+
+def main(args):
+ if args.aid_file is None:
+ aidlist=args.aid.split(",")
+ else:
+ aidlist=readfile.getListFromFile(args.aid_file)
+ dic=get_aid_cid_dict_for_list(aidlist)
+ rest_tool_functions.write_to_csv(dic, args.outfile)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--aid', type=str,
+ help="AIDs of the BioAssay")
+ parser.add_argument('--aid-file', dest="aid_file", type=argparse.FileType('r'),
+ help="Specify a file with a list of aids, one per line")
+ parser.add_argument('--outfile', type=argparse.FileType('w'),
+ help="Specify output file")
+ if len(sys.argv) < 2:
+ print "Too few arguments..."
+ parser.print_help()
+ exit(1)
+ args = parser.parse_args()
+ main( args )
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_functions.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_functions.py Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+
+import sys, os
+import argparse
+import urllib2, urllib, httplib
+import readfile
+import xml.sax as sax
+
+class DictHandler(sax.handler.ContentHandler):
+
+ def __init__(self):
+ self.ergebnis = {}
+ self.schluessel = ""
+ self.wert = ""
+ self.aktiv = None
+
+ def startElement(self, name, attrs):
+ if name == "Information":
+ self.schluessel = ""
+ self.wert = ""
+ elif name == "AID" or name=="CID":
+ self.aktiv = name
+
+ def endElement(self, name):
+ if name == "AID":
+ self.schluessel=self.schluessel.strip()
+ self.ergebnis[self.schluessel]=[]
+ #print("huhn")
+ self.aktiv=None
+ elif name == "CID":
+ self.aktiv = None
+ self.ergebnis[self.schluessel].append(self.wert)
+ self.wert=""
+ def characters(self, content):
+ if self.aktiv == "AID":
+ self.schluessel += content
+ elif self.aktiv == "CID":
+ self.wert += content
+
+
+def give_aid_cid_dict_from_xml(xmlfile):
+ handler = DictHandler()
+ parser = sax.make_parser()
+ parser.setContentHandler(handler)
+ parser.parse(xmlfile)
+ dic=handler.ergebnis
+ return dic
+
+#get every aid as a list
+#returns a dictionary with aid as key and as value the list of cids
+def getAllAssayIDs():
+ url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT"
+ data=readfile.getresult(url)
+ aidlist=readfile.getListFromString(data)
+ return aidlist
+
+
+def getIDofLine(line):
+ arr=line.split(">")
+ if len(arr) > 1:
+ aid=arr[1].split("<")[0]
+ return aid
+ else:
+ return "-1"
+
+def write_to_csv(aid_cid_dict, outfile):
+ for key in aid_cid_dict:
+ for cid in aid_cid_dict[key]:
+ outfile.write(key)
+ outfile.write(",")
+ outfile.write(cid)
+ outfile.write("\n")
+
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_macros.xml Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,66 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 1ad356686717 -r 9ee84d9fd3a7 rest_tool_search.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rest_tool_search.py Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+import sys, os
+import argparse
+import readfile
+
+def main(args):
+ #search for acitivity or target
+ url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/"+args.type+"/name/"+args.name
+ if args.type == "assay":
+ url+="/aids"
+ elif args.type == "compound":
+ url+="/cids"
+ else:
+ url+="/sids"
+ url+="/txt"
+ #print("url: "+url)
+ data=readfile.getresult(url)
+ args.outfile.write(data)
+ args.outfile.close()
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--name', type=str, required=True,
+ help="Enter the name")
+ parser.add_argument('--type', type=str, required=True,
+ help="What you want to search for")
+ parser.add_argument('--outfile', type=argparse.FileType('w'), required=True,
+ help="Specify output file")
+ if len(sys.argv) < 2:
+ print "Too few arguments..."
+ parser.print_help()
+ exit(1)
+ args = parser.parse_args()
+ main( args )
diff -r 1ad356686717 -r 9ee84d9fd3a7 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Thu Apr 03 06:50:57 2014 -0400
@@ -0,0 +1,5 @@
+
+
+ $REPOSITORY_INSTALL_DIR
+
+