Mercurial > repos > melissacline > xena_find_datasets
changeset 0:a4253c71f31d
Uploaded
author | melissacline |
---|---|
date | Tue, 09 Sep 2014 21:53:10 -0400 |
parents | |
children | a1979987ecf2 |
files | xenaFindDatasets/tool_dependencies.xml xenaFindDatasets/xenaFindDatasets.py xenaFindDatasets/xenaFindDatasets.xml |
diffstat | 3 files changed, 73 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xenaFindDatasets/tool_dependencies.xml Tue Sep 09 21:53:10 2014 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="installXena" version="1.0"> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu" name="start_xena" owner="melissacline" changeset_revision="82755b0ee5a5"/> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xenaFindDatasets/xenaFindDatasets.py Tue Sep 09 21:53:10 2014 -0400 @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +import argparse +import json +from functools import partial +import re +import xena_query as xena + +# xena url for public datasets, at ucsc +xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena" + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("datasetType", type=str) + parser.add_argument("cohortName", type=str) + parser.add_argument("outfile", type=str) + args = parser.parse_args() + + cohort = args.cohortName + if not re.search("^%", cohort): + cohort = "%" + cohort + if not re.search("%$", cohort): + cohort = cohort + "%" + + # short-hand wrappers for xena methods + post = xena.compose(json.loads, partial(xena.post, xena_url)) + find_datasets_type_pattern = xena.compose(post, + xena.find_datasets_type_pattern) + name_to_url = partial(xena.name_to_url, xena_url) + datasets = find_datasets_type_pattern(args.datasetType, cohort) + fp = open(args.outfile, "w") + fp.write("Cohort\tDataset\n") + for thisDataset in datasets: + tokens = thisDataset.split("/") + cohort = tokens[-2] + datasetName = tokens[-1] + fp.write("%s\t%s\n" % (cohort, datasetName)) + fp.close() + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xenaFindDatasets/xenaFindDatasets.xml Tue Sep 09 21:53:10 2014 -0400 @@ -0,0 +1,25 @@ +<tool id="xenaFindDatasets" description="Find datasets in the Xena database" name="Find Datasets in Xena" version="0.0.1"> + <description> + Retrieve the cohorts and datasets that match the specified pattern. + </description> + <requirements> + <requirement type="package" version="1.0">installXena</requirement> + </requirements> + <command interpreter="python"> + xenaFindDatasets.py ${datasetType} ${cohortName} $outFile + </command> + <inputs> + <param name="datasetType" type="select" label="Dataset Type"> + <option value="genomicMatrix">Genomic Data</option> + <option value="clinicalMatrix">Clinical Data</option> + <option value="mutationVector">Mutation Data</option> + </param> + <param type="text" name="cohortName" label="Cohort (wildcard is %)" optional="true" /> + </inputs> + <outputs> + <data format="tabular" name="outFile" /> + </outputs> + <help> + Later... + </help> +</tool>