Mercurial > repos > melissacline > ucsc_cancer_utilities
annotate synapseGetDataset.py @ 37:e81019e3ac99
Updated synapseGetDataset to look at the filename rather than the (no longer existant) content type field to determine if the data is in zip format
author | melissacline |
---|---|
date | Mon, 27 Jul 2015 16:29:24 -0700 |
parents | d1104ad3646a |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2
d1104ad3646a
Trying one more time to get this push to happen...
melissacline
parents:
1
diff
changeset
|
2 """Download a dataset from Synapse into Galaxy """ |
0 | 3 |
4 import argparse | |
5 import json | |
37
e81019e3ac99
Updated synapseGetDataset to look at the filename rather than the (no longer existant) content type field to determine if the data is in zip format
melissacline
parents:
2
diff
changeset
|
6 import re |
0 | 7 import synapseclient |
8 import sys | |
1
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
9 import zipfile |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
10 |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
11 class InputError(Exception): |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
12 def __init__(self, value): |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
13 self.value = value |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
14 def __str__(self): |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
15 return repr(self.value) |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
16 |
0 | 17 |
18 def saveMetadata(entity, metadataPathname): | |
19 fp = open(metadataPathname, "w") | |
20 entityMetadata = dict(entity.properties.items() | |
21 + entity.annotations.items()) | |
22 jsonMetadata = json.dumps(entityMetadata) | |
23 fp.write("%s\n" % (jsonMetadata)) | |
24 fp.close() | |
25 | |
26 def saveData(entity, dataPathname): | |
37
e81019e3ac99
Updated synapseGetDataset to look at the filename rather than the (no longer existant) content type field to determine if the data is in zip format
melissacline
parents:
2
diff
changeset
|
27 if re.search("\.zip$", entity.path): |
1
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
28 zf = zipfile.ZipFile(entity.path) |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
29 if len(zf.namelist()) > 1: |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
30 raise InputError(len(zf.namelist())), "Error: more than one input file" |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
31 else: |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
32 data = zf.read(zf.namelist()[0]) |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
33 fpOut = open(dataPathname, "w") |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
34 fpOut.write(data) |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
35 fpOut.close() |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
36 else: |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
37 fpIn = open(entity.path) |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
38 fpOut = open(dataPathname, "w") |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
39 for row in fpIn: |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
40 fpOut.write(row) |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
41 fpIn.close() |
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
42 fpOut.close() |
0 | 43 |
44 def main(): | |
45 parser = argparse.ArgumentParser() | |
46 parser.add_argument("entityId", type=str) | |
47 parser.add_argument("email", type=str) | |
48 parser.add_argument("outputMetadataFile", type=str) | |
49 parser.add_argument("outputDataFile", type=str) | |
50 parser.add_argument("--apiKey", type=str, default=None) | |
51 parser.add_argument("--password", type=str, default=None) | |
52 args = parser.parse_args() | |
53 | |
54 syn = synapseclient.Synapse() | |
55 assert(args.apiKey != None or args.password != None) | |
56 try: | |
57 if args.apiKey is not None: | |
58 syn.login(email=args.email, apiKey=args.apiKey) | |
59 else: | |
60 syn.login(email=args.email, password = args.password) | |
61 except: | |
62 print "Login Unsuccessful\n" | |
63 sys.exit(-1) | |
64 else: | |
65 try: | |
66 entity=syn.get(args.entityId) | |
67 except: | |
68 exc_type, exc_value, exc_traceback = sys.exc_info() | |
69 lines = traceback.format_exception(exc_type, exc_value, | |
70 exc_traceback) | |
71 allLines = ''.join('!! ' + line for line in lines) | |
72 print "Unsuccessful: error %s\n" % allLines | |
73 sys.exit(-1) | |
74 else: | |
75 saveMetadata(entity, args.outputMetadataFile) | |
76 saveData(entity, args.outputDataFile) | |
77 sys.exit(0) | |
78 | |
79 if __name__ == "__main__": | |
80 main() | |
81 | |
82 |