Mercurial > repos > melissacline > ucsc_cancer_utilities
annotate synapseGetDataset.py @ 37:e81019e3ac99
Updated synapseGetDataset to look at the filename rather than the (no longer existant) content type field to determine if the data is in zip format
| author | melissacline |
|---|---|
| date | Mon, 27 Jul 2015 16:29:24 -0700 |
| parents | d1104ad3646a |
| children |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
|
2
d1104ad3646a
Trying one more time to get this push to happen...
melissacline
parents:
1
diff
changeset
|
2 """Download a dataset from Synapse into Galaxy """ |
| 0 | 3 |
| 4 import argparse | |
| 5 import json | |
|
37
e81019e3ac99
Updated synapseGetDataset to look at the filename rather than the (no longer existant) content type field to determine if the data is in zip format
melissacline
parents:
2
diff
changeset
|
6 import re |
| 0 | 7 import synapseclient |
| 8 import sys | |
|
1
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
9 import zipfile |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
10 |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
11 class InputError(Exception): |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
12 def __init__(self, value): |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
13 self.value = value |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
14 def __str__(self): |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
15 return repr(self.value) |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
16 |
| 0 | 17 |
| 18 def saveMetadata(entity, metadataPathname): | |
| 19 fp = open(metadataPathname, "w") | |
| 20 entityMetadata = dict(entity.properties.items() | |
| 21 + entity.annotations.items()) | |
| 22 jsonMetadata = json.dumps(entityMetadata) | |
| 23 fp.write("%s\n" % (jsonMetadata)) | |
| 24 fp.close() | |
| 25 | |
| 26 def saveData(entity, dataPathname): | |
|
37
e81019e3ac99
Updated synapseGetDataset to look at the filename rather than the (no longer existant) content type field to determine if the data is in zip format
melissacline
parents:
2
diff
changeset
|
27 if re.search("\.zip$", entity.path): |
|
1
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
28 zf = zipfile.ZipFile(entity.path) |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
29 if len(zf.namelist()) > 1: |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
30 raise InputError(len(zf.namelist())), "Error: more than one input file" |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
31 else: |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
32 data = zf.read(zf.namelist()[0]) |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
33 fpOut = open(dataPathname, "w") |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
34 fpOut.write(data) |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
35 fpOut.close() |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
36 else: |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
37 fpIn = open(entity.path) |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
38 fpOut = open(dataPathname, "w") |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
39 for row in fpIn: |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
40 fpOut.write(row) |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
41 fpIn.close() |
|
ae91153d3fc2
Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents:
0
diff
changeset
|
42 fpOut.close() |
| 0 | 43 |
| 44 def main(): | |
| 45 parser = argparse.ArgumentParser() | |
| 46 parser.add_argument("entityId", type=str) | |
| 47 parser.add_argument("email", type=str) | |
| 48 parser.add_argument("outputMetadataFile", type=str) | |
| 49 parser.add_argument("outputDataFile", type=str) | |
| 50 parser.add_argument("--apiKey", type=str, default=None) | |
| 51 parser.add_argument("--password", type=str, default=None) | |
| 52 args = parser.parse_args() | |
| 53 | |
| 54 syn = synapseclient.Synapse() | |
| 55 assert(args.apiKey != None or args.password != None) | |
| 56 try: | |
| 57 if args.apiKey is not None: | |
| 58 syn.login(email=args.email, apiKey=args.apiKey) | |
| 59 else: | |
| 60 syn.login(email=args.email, password = args.password) | |
| 61 except: | |
| 62 print "Login Unsuccessful\n" | |
| 63 sys.exit(-1) | |
| 64 else: | |
| 65 try: | |
| 66 entity=syn.get(args.entityId) | |
| 67 except: | |
| 68 exc_type, exc_value, exc_traceback = sys.exc_info() | |
| 69 lines = traceback.format_exception(exc_type, exc_value, | |
| 70 exc_traceback) | |
| 71 allLines = ''.join('!! ' + line for line in lines) | |
| 72 print "Unsuccessful: error %s\n" % allLines | |
| 73 sys.exit(-1) | |
| 74 else: | |
| 75 saveMetadata(entity, args.outputMetadataFile) | |
| 76 saveData(entity, args.outputDataFile) | |
| 77 sys.exit(0) | |
| 78 | |
| 79 if __name__ == "__main__": | |
| 80 main() | |
| 81 | |
| 82 |
