Mercurial > repos > melissacline > xena_get_dataset
changeset 0:aa2ed7c6c09b
Uploaded
author | melissacline |
---|---|
date | Mon, 15 Sep 2014 19:13:12 -0400 |
parents | |
children | bf4a5e41adbc |
files | xenaGetDataset/._xenaGetDataset.py xenaGetDataset/tool_dependencies.xml xenaGetDataset/xenaGetDataset.py xenaGetDataset/xenaGetDataset.xml |
diffstat | 4 files changed, 88 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xenaGetDataset/tool_dependencies.xml Mon Sep 15 19:13:12 2014 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="installXena" version="1.0"> + <repository name="start_xena" owner="melissacline" toolshed="http://testtoolshed.g2.bx.psu.edu" changeset_revision="75c7d80df9c1"/> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xenaGetDataset/xenaGetDataset.py Mon Sep 15 19:13:12 2014 -0400 @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +import argparse +import json +import urllib2 +import re +from functools import partial + +# import xena query utilities +import xena_query as xena + +# xena url for public datasets, at ucsc +xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena" + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("datasetType", type=str) + parser.add_argument("cohortName", type=str) + parser.add_argument("datasetName", type=str) + parser.add_argument("outfile", type=str) + args = parser.parse_args() + + # short-hand wrappers for xena methods + post = xena.compose(json.loads, partial(xena.post, xena_url)) + find_datasets_type_pattern = xena.compose(post, + xena.find_datasets_type_pattern) + name_to_url = partial(xena.name_to_url, xena_url) + +# Find all TCGA mutation calls by matching on the type and dataset name. + fullDatasetName = "%s/%s" % (args.cohortName, args.datasetName) + fullDatasetName = "%" + fullDatasetName + matchingDatasets = find_datasets_type_pattern(args.datasetType, + fullDatasetName) + if len(matchingDatasets) > 1: + errorMsg = "Error: matches to multiple datasets:" + for dataset in matchingDatasets: + errorMsg = "%s %s" % (errorMsg, dataset) + exit(errorMsg) + else: + ff = urllib2.urlopen(name_to_url(matchingDatasets[0])) + with open(args.outfile, "w") as out: + out.write(ff.read()) + + +if __name__ == "__main__": + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xenaGetDataset/xenaGetDataset.xml Mon Sep 15 19:13:12 2014 -0400 @@ -0,0 +1,35 @@ +<tool id="xenaGetDatasets" description="Fetch the indicated dataset from the Xena database" name="Get Datasets from Xena" version="0.0.1"> + <description> + Retrieve the indicated dataset + </description> + <command interpreter="python"> + xenaGetDataset.py ${datasetType} ${cohortName} ${datasetName} $outFile + </command> + <inputs> + <param name="datasetType" type="select" label="Dataset Type"> + <option value="genomicMatrix">Genomic Data</option> + <option value="clinicalMatrix">Clinical Data</option> + <option value="mutationVector">Mutation Data</option> + </param> + <param type="text" name="cohortName" label="Cohort (wildcard is %)" optional="false"> + <sanitizer> + <valid> + <add value="%"/> + </valid> + </sanitizer> + </param> + <param type="text" name="datasetName" label="Dataset (wildcard is %)" optional="false"> + <sanitizer> + <valid> + <add value="%"/> + </valid> + </sanitizer> + </param> + </inputs> + <outputs> + <data format="tabular" name="outFile" /> + </outputs> + <help> + Later... + </help> +</tool>