Mercurial > repos > devteam > dgidb_annotator
changeset 0:28d72b995c6b draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/dgidb_annotator commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
author | devteam |
---|---|
date | Mon, 09 Nov 2015 11:29:28 -0500 |
parents | |
children | |
files | dgidb_annotator.py dgidb_annotator.xml test-data/in1.tabular test-data/out1.tabular |
diffstat | 4 files changed, 182 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dgidb_annotator.py Mon Nov 09 11:29:28 2015 -0500 @@ -0,0 +1,87 @@ +''' +Annotates a tabular file with information from the Drug-Gene Interaction (DGI) database. +''' + +import optparse, json, urllib2, sys + +def __main__(): + # -- Parse command line. -- + parser = optparse.OptionParser() + parser.add_option('-g', '--gene-name-col', dest='gene_name_col', help='column of gene names') + parser.add_option('-a', '--print-all', dest='print_all', action='store_true', help='print all lines, even though without a result') + parser.add_option('-e', '--expert-curated', dest='expert_curated', action='store_true', help='use only expert curated results') + (options, args) = parser.parse_args() + gene_name_col = int(options.gene_name_col) - 1 + + # Open input stream. + if len(args) > 0: + input_file = open(args[0], 'r') + else: + input_file = sys.stdin + + # -- Set up gene list queries. -- + + # Get gene list. + gene_list = [] + lines = [] + for line in input_file: + entry = line.split('\t')[gene_name_col].strip() + # Some annotations may be of the form + # <gene_name>(<splicing_info>) or <gene_name>;<gene_name>(splicing_info) + gene_list.append(entry.split(';')[0].split('(')[0]) + lines.append(line.strip()) + + # Set up gene lists to be ~8K because this is near the max HTTP request length. + gene_list = ','.join(set(gene_list)) + queries = [] + MAX_QUERY_SIZE = 8000 + if len(gene_list) > MAX_QUERY_SIZE: + # Break queries. + queries = [ gene_list[i:i + MAX_QUERY_SIZE] for i in range(0, len(gene_list), MAX_QUERY_SIZE) ] + + # Adjust queries to include whole genes. + for i, query in enumerate( queries[1:] ): + part_gene, comma, remainder = query.partition(',') + queries[i] += part_gene + queries[i+1] = remainder + else: + queries = [ gene_list ] + + # -- Query and process results. -- + + # Query for results. + results = [] + for genes in queries: + query_str = 'http://dgidb.genome.wustl.edu/api/v1/interactions.json?genes=%s' % genes + if options.expert_curated: + query_str += '&source_trust_levels=Expert%20curated' + raw_results = urllib2.urlopen(query_str).read() + results_dict = json.loads(raw_results) + results.extend(results_dict['matchedTerms']) + + # Process results. + for result in results: + # Process result. + processed_results = [] + result_fields = [ result['geneName'], result['geneLongName'], ','.join( result['geneCategories'] ) ] + for interaction in result['interactions']: + result_fields = result_fields[0:3] + result_fields.extend( [ + interaction['interactionType'], interaction['drugName'], interaction['source'] + ] ) + processed_results.append( '\t'.join( result_fields ) ) + + # Store processed results. + results_dict[ result['searchTerm'] ] = processed_results + + # -- Annotate input file and produce output. -- + for line in lines: + fields = line.split('\t') + gene = fields[gene_name_col] + if gene in results_dict: + for result in results_dict[gene]: + print line.strip() + '\t' + result + elif options.print_all: + print line + +if __name__=="__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dgidb_annotator.xml Mon Nov 09 11:29:28 2015 -0500 @@ -0,0 +1,45 @@ +<tool id="dgidb_annotate" name="Annotate with DGI" version="0.1"> + <description>database info</description> + + <command interpreter="python"> + dgidb_annotator.py + #if $expert_curated_only: + -e + #end if + #if $print_all: + -a + #end if + -g $gene_name_col + $input > $output + </command> + <inputs> + <param name="input" label="Input" type="data" format="tabular"/> + <param name="expert_curated_only" label="Use Only Expert Curated Results" type="boolean"/> + <param name="print_all" label="Print All Rows, including those without Annotation" type="boolean"/> + <param name="gene_name_col" label="Gene Name Column" type="data_column" data_ref="input"/> + </inputs> + + <outputs> + <data name="output" format="tabular"/> + </outputs> + + <stdio> + <regex match=".*" source="both" level="log" description="tool progress"/> + </stdio> + + <tests> + <test> + <param name="input" value="in1.tabular"/> + <param name="expert_curated_only" value="True"/> + <param name="print_all" value="False"/> + <param name="gene_name_col" value="1"/> + <output name="output" file="out1.tabular"/> + </test> + </tests> + + <help> +Annotates a tabular dataset with information from the `Drug-Gene Interations database`__ + +.. __: http://dgidb.genome.wustl.edu/ + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/in1.tabular Mon Nov 09 11:29:28 2015 -0500 @@ -0,0 +1,4 @@ +RET gene1 +BIRC3 gene2 +ATM gene3 +KRAS gene4
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out1.tabular Mon Nov 09 11:29:28 2015 -0500 @@ -0,0 +1,46 @@ +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE n/a SUNITINIB TEND +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE n/a IMATINIB TEND +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor MOTESANIB MyCancerGenome +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor VANDETANIB MyCancerGenome +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor SORAFENIB MyCancerGenome +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor REGORAFENIB MyCancerGenome +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor AMUVATINIB MyCancerGenome +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor SUNITINIB MyCancerGenome +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor CABOZANTINIB MyCancerGenome +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor SUNITINIB TALC +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor VANDETANIB TALC +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor MOTESANIB TALC +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor CABOZANTINIB TALC +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor REGORAFENIB TALC +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor LENVATINIB TALC +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor AMUVATINIB TALC +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE inhibitor AT9283 TALC +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE n/a VANDETANIB ClearityFoundationClinicalTrial +RET gene1 RET ret proto-oncogene TYROSINE KINASE,DRUGGABLE GENOME,KINASE,CLINICALLY ACTIONABLE n/a DOVITINIB ClearityFoundationClinicalTrial +BIRC3 gene2 BIRC3 baculoviral IAP repeat containing 3 antagonist AEG40826 TALC +BIRC3 gene2 BIRC3 baculoviral IAP repeat containing 3 antagonist TL 32711 TALC +BIRC3 gene2 BIRC3 baculoviral IAP repeat containing 3 antagonist AT-406 TALC +BIRC3 gene2 BIRC3 baculoviral IAP repeat containing 3 antagonist GDC0917 TALC +BIRC3 gene2 BIRC3 baculoviral IAP repeat containing 3 antagonist LCL161 TALC +ATM gene3 ATM ataxia telangiectasia mutated PHOSPHATIDYLINOSITOL 3 KINASE,DRUGGABLE GENOME,CLINICALLY ACTIONABLE,SERINE THREONINE KINASE,KINASE,TUMOR SUPPRESSOR,DNA REPAIR n/a E7449 ClearityFoundationBiomarkers +ATM gene3 ATM ataxia telangiectasia mutated PHOSPHATIDYLINOSITOL 3 KINASE,DRUGGABLE GENOME,CLINICALLY ACTIONABLE,SERINE THREONINE KINASE,KINASE,TUMOR SUPPRESSOR,DNA REPAIR n/a NIRAPARIB ClearityFoundationBiomarkers +ATM gene3 ATM ataxia telangiectasia mutated PHOSPHATIDYLINOSITOL 3 KINASE,DRUGGABLE GENOME,CLINICALLY ACTIONABLE,SERINE THREONINE KINASE,KINASE,TUMOR SUPPRESSOR,DNA REPAIR n/a OLAPARIB ClearityFoundationBiomarkers +ATM gene3 ATM ataxia telangiectasia mutated PHOSPHATIDYLINOSITOL 3 KINASE,DRUGGABLE GENOME,CLINICALLY ACTIONABLE,SERINE THREONINE KINASE,KINASE,TUMOR SUPPRESSOR,DNA REPAIR n/a BMN673 ClearityFoundationBiomarkers +ATM gene3 ATM ataxia telangiectasia mutated PHOSPHATIDYLINOSITOL 3 KINASE,DRUGGABLE GENOME,CLINICALLY ACTIONABLE,SERINE THREONINE KINASE,KINASE,TUMOR SUPPRESSOR,DNA REPAIR n/a RUCAPARIB ClearityFoundationBiomarkers +ATM gene3 ATM ataxia telangiectasia mutated PHOSPHATIDYLINOSITOL 3 KINASE,DRUGGABLE GENOME,CLINICALLY ACTIONABLE,SERINE THREONINE KINASE,KINASE,TUMOR SUPPRESSOR,DNA REPAIR n/a VELIPARIB ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE n/a VANDETANIB ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE n/a EVEROLIMUS ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE n/a TEMSIROLIMUS ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE n/a GDC-0973 ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE n/a MEK162 ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE n/a PD-325901 ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE n/a RAFAMETINIB ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE n/a SELUMETINIB ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE n/a TRAMETINIB ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE n/a CETUXIMAB ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE n/a ERLOTINIB ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE n/a GEFITINIB ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE n/a PANITUMUMAB ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE n/a PIMASERTIB ClearityFoundationBiomarkers +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE inhibitor REOLYSIN CancerCommons +KRAS gene4 KRAS v-Ki-ras2 Kirsten rat sarcoma viral oncogene homolog CLINICALLY ACTIONABLE vaccine RAS PEPTIDE CANCER VACCINE TALC