changeset 0:28d72b995c6b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/dgidb_annotator commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
author devteam
date Mon, 09 Nov 2015 11:29:28 -0500
parents
children
files dgidb_annotator.py dgidb_annotator.xml test-data/in1.tabular test-data/out1.tabular
diffstat 4 files changed, 182 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dgidb_annotator.py	Mon Nov 09 11:29:28 2015 -0500
@@ -0,0 +1,87 @@
+'''
+Annotates a tabular file with information from the Drug-Gene Interaction (DGI) database.
+'''
+
+import optparse, json, urllib2, sys
+
+def __main__():
+    # -- Parse command line. --
+    parser = optparse.OptionParser()
+    parser.add_option('-g', '--gene-name-col', dest='gene_name_col', help='column of gene names')
+    parser.add_option('-a', '--print-all', dest='print_all', action='store_true', help='print all lines, even though without a result')
+    parser.add_option('-e', '--expert-curated', dest='expert_curated', action='store_true', help='use only expert curated results')
+    (options, args) = parser.parse_args()
+    gene_name_col = int(options.gene_name_col) - 1
+
+    # Open input stream.
+    if len(args) > 0:
+        input_file = open(args[0], 'r')
+    else:
+        input_file = sys.stdin
+
+    # -- Set up gene list queries. --
+
+    # Get gene list.
+    gene_list = []
+    lines = []
+    for line in input_file:
+        entry = line.split('\t')[gene_name_col].strip()
+        # Some annotations may be of the form 
+        #    <gene_name>(<splicing_info>) or <gene_name>;<gene_name>(splicing_info)
+        gene_list.append(entry.split(';')[0].split('(')[0])
+        lines.append(line.strip())
+    
+    # Set up gene lists to be ~8K because this is near the max HTTP request length.
+    gene_list = ','.join(set(gene_list))
+    queries = []
+    MAX_QUERY_SIZE = 8000
+    if len(gene_list) > MAX_QUERY_SIZE:
+        # Break queries.
+        queries = [ gene_list[i:i + MAX_QUERY_SIZE] for i in range(0, len(gene_list), MAX_QUERY_SIZE) ]
+
+        # Adjust queries to include whole genes.
+        for i, query in enumerate( queries[1:] ):
+            part_gene, comma, remainder = query.partition(',')
+            queries[i] += part_gene
+            queries[i+1] = remainder
+    else:
+        queries = [ gene_list ]
+
+    # -- Query and process results. --
+
+    # Query for results.
+    results = []
+    for genes in queries:
+        query_str = 'http://dgidb.genome.wustl.edu/api/v1/interactions.json?genes=%s' % genes
+        if options.expert_curated:
+            query_str += '&source_trust_levels=Expert%20curated'
+        raw_results = urllib2.urlopen(query_str).read()
+        results_dict = json.loads(raw_results)
+        results.extend(results_dict['matchedTerms'])
+        
+    # Process results.
+    for result in results:
+        # Process result.
+        processed_results = []
+        result_fields = [ result['geneName'], result['geneLongName'], ','.join( result['geneCategories'] ) ]
+        for interaction in result['interactions']:
+            result_fields = result_fields[0:3]
+            result_fields.extend( [
+                interaction['interactionType'], interaction['drugName'], interaction['source']
+            ] )
+            processed_results.append( '\t'.join( result_fields ) )
+            
+        # Store processed results.
+        results_dict[ result['searchTerm'] ] = processed_results
+
+    # -- Annotate input file and produce output. --
+    for line in lines:
+        fields = line.split('\t')
+        gene = fields[gene_name_col]
+        if gene in results_dict:
+            for result in results_dict[gene]:
+                print line.strip() + '\t' + result
+        elif options.print_all:
+            print line
+
+if __name__=="__main__": __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dgidb_annotator.xml	Mon Nov 09 11:29:28 2015 -0500
@@ -0,0 +1,45 @@
+<tool id="dgidb_annotate" name="Annotate with DGI" version="0.1">
+    <description>database info</description>
+    
+    <command interpreter="python">
+        dgidb_annotator.py
+        #if $expert_curated_only:
+            -e
+        #end if
+        #if $print_all:
+            -a
+        #end if
+        -g $gene_name_col
+        $input &gt; $output
+    </command>
+    <inputs>
+        <param name="input" label="Input" type="data" format="tabular"/>
+        <param name="expert_curated_only" label="Use Only Expert Curated Results" type="boolean"/>
+        <param name="print_all" label="Print All Rows, including those without Annotation" type="boolean"/>
+        <param name="gene_name_col" label="Gene Name Column" type="data_column" data_ref="input"/>
+    </inputs>
+
+    <outputs>
+        <data name="output" format="tabular"/>
+    </outputs>
+
+    <stdio>
+        <regex match=".*" source="both" level="log" description="tool progress"/>
+    </stdio>
+
+    <tests>
+        <test>
+            <param name="input" value="in1.tabular"/>
+            <param name="expert_curated_only" value="True"/>
+            <param name="print_all" value="False"/>
+            <param name="gene_name_col" value="1"/>
+            <output name="output" file="out1.tabular"/>
+        </test>
+    </tests>
+
+    <help>
+Annotates a tabular dataset with information from the `Drug-Gene Interations database`__
+
+.. __: http://dgidb.genome.wustl.edu/
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/in1.tabular	Mon Nov 09 11:29:28 2015 -0500
@@ -0,0 +1,4 @@
+RET	gene1
+BIRC3	gene2
+ATM	gene3
+KRAS	gene4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out1.tabular	Mon Nov 09 11:29:28 2015 -0500
@@ -0,0 +1,46 @@
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	n/a	SUNITINIB	TEND
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	n/a	IMATINIB	TEND
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	MOTESANIB	MyCancerGenome
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	VANDETANIB	MyCancerGenome
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	SORAFENIB	MyCancerGenome
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	REGORAFENIB	MyCancerGenome
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	AMUVATINIB	MyCancerGenome
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	SUNITINIB	MyCancerGenome
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	CABOZANTINIB	MyCancerGenome
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	SUNITINIB	TALC
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	VANDETANIB	TALC
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	MOTESANIB	TALC
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	CABOZANTINIB	TALC
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	REGORAFENIB	TALC
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	LENVATINIB	TALC
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	AMUVATINIB	TALC
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	inhibitor	AT9283	TALC
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	n/a	VANDETANIB	ClearityFoundationClinicalTrial
+RET	gene1	RET	ret proto-oncogene	TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE	n/a	DOVITINIB	ClearityFoundationClinicalTrial
+BIRC3	gene2	BIRC3	baculoviral IAP repeat containing 3		antagonist	AEG40826	TALC
+BIRC3	gene2	BIRC3	baculoviral IAP repeat containing 3		antagonist	TL 32711	TALC
+BIRC3	gene2	BIRC3	baculoviral IAP repeat containing 3		antagonist	AT-406	TALC
+BIRC3	gene2	BIRC3	baculoviral IAP repeat containing 3		antagonist	GDC0917	TALC
+BIRC3	gene2	BIRC3	baculoviral IAP repeat containing 3		antagonist	LCL161	TALC
+ATM	gene3	ATM	ataxia telangiectasia mutated	PHOSPHATIDYLINOSITOL 3 KINASE,DRUGGABLE GENOME,CLINICALLY ACTIONABLE,SERINE THREONINE KINASE,KINASE,TUMOR SUPPRESSOR,DNA REPAIR	n/a	E7449	ClearityFoundationBiomarkers
+ATM	gene3	ATM	ataxia telangiectasia mutated	PHOSPHATIDYLINOSITOL 3 KINASE,DRUGGABLE GENOME,CLINICALLY ACTIONABLE,SERINE THREONINE KINASE,KINASE,TUMOR SUPPRESSOR,DNA REPAIR	n/a	NIRAPARIB	ClearityFoundationBiomarkers
+ATM	gene3	ATM	ataxia telangiectasia mutated	PHOSPHATIDYLINOSITOL 3 KINASE,DRUGGABLE GENOME,CLINICALLY ACTIONABLE,SERINE THREONINE KINASE,KINASE,TUMOR SUPPRESSOR,DNA REPAIR	n/a	OLAPARIB	ClearityFoundationBiomarkers
+ATM	gene3	ATM	ataxia telangiectasia mutated	PHOSPHATIDYLINOSITOL 3 KINASE,DRUGGABLE GENOME,CLINICALLY ACTIONABLE,SERINE THREONINE KINASE,KINASE,TUMOR SUPPRESSOR,DNA REPAIR	n/a	BMN673	ClearityFoundationBiomarkers
+ATM	gene3	ATM	ataxia telangiectasia mutated	PHOSPHATIDYLINOSITOL 3 KINASE,DRUGGABLE GENOME,CLINICALLY ACTIONABLE,SERINE THREONINE KINASE,KINASE,TUMOR SUPPRESSOR,DNA REPAIR	n/a	RUCAPARIB	ClearityFoundationBiomarkers
+ATM	gene3	ATM	ataxia telangiectasia mutated	PHOSPHATIDYLINOSITOL 3 KINASE,DRUGGABLE GENOME,CLINICALLY ACTIONABLE,SERINE THREONINE KINASE,KINASE,TUMOR SUPPRESSOR,DNA REPAIR	n/a	VELIPARIB	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	n/a	VANDETANIB	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	n/a	EVEROLIMUS	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	n/a	TEMSIROLIMUS	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	n/a	GDC-0973	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	n/a	MEK162	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	n/a	PD-325901	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	n/a	RAFAMETINIB	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	n/a	SELUMETINIB	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	n/a	TRAMETINIB	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	n/a	CETUXIMAB	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	n/a	ERLOTINIB	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	n/a	GEFITINIB	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	n/a	PANITUMUMAB	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	n/a	PIMASERTIB	ClearityFoundationBiomarkers
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	inhibitor	REOLYSIN	CancerCommons
+KRAS	gene4	KRAS	v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog	CLINICALLY ACTIONABLE	vaccine	RAS PEPTIDE CANCER VACCINE	TALC