annotate synapseGetDataset.py @ 52:3a036a34c362

better handle of input file
author jingchunzhu
date Thu, 17 Sep 2015 15:00:45 -0700
parents e81019e3ac99
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
1 #!/usr/bin/env python
2
d1104ad3646a Trying one more time to get this push to happen...
melissacline
parents: 1
diff changeset
2 """Download a dataset from Synapse into Galaxy """
0
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
3
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
4 import argparse
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
5 import json
37
e81019e3ac99 Updated synapseGetDataset to look at the filename rather than the (no longer existant) content type field to determine if the data is in zip format
melissacline
parents: 2
diff changeset
6 import re
0
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
7 import synapseclient
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
8 import sys
1
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
9 import zipfile
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
10
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
11 class InputError(Exception):
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
12 def __init__(self, value):
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
13 self.value = value
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
14 def __str__(self):
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
15 return repr(self.value)
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
16
0
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
17
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
18 def saveMetadata(entity, metadataPathname):
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
19 fp = open(metadataPathname, "w")
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
20 entityMetadata = dict(entity.properties.items()
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
21 + entity.annotations.items())
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
22 jsonMetadata = json.dumps(entityMetadata)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
23 fp.write("%s\n" % (jsonMetadata))
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
24 fp.close()
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
25
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
26 def saveData(entity, dataPathname):
37
e81019e3ac99 Updated synapseGetDataset to look at the filename rather than the (no longer existant) content type field to determine if the data is in zip format
melissacline
parents: 2
diff changeset
27 if re.search("\.zip$", entity.path):
1
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
28 zf = zipfile.ZipFile(entity.path)
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
29 if len(zf.namelist()) > 1:
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
30 raise InputError(len(zf.namelist())), "Error: more than one input file"
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
31 else:
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
32 data = zf.read(zf.namelist()[0])
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
33 fpOut = open(dataPathname, "w")
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
34 fpOut.write(data)
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
35 fpOut.close()
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
36 else:
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
37 fpIn = open(entity.path)
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
38 fpOut = open(dataPathname, "w")
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
39 for row in fpIn:
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
40 fpOut.write(row)
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
41 fpIn.close()
ae91153d3fc2 Updated synapseGetDataset.py to automatically unzip if the file downloaded is a zip archive.
melissacline
parents: 0
diff changeset
42 fpOut.close()
0
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
43
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
44 def main():
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
45 parser = argparse.ArgumentParser()
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
46 parser.add_argument("entityId", type=str)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
47 parser.add_argument("email", type=str)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
48 parser.add_argument("outputMetadataFile", type=str)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
49 parser.add_argument("outputDataFile", type=str)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
50 parser.add_argument("--apiKey", type=str, default=None)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
51 parser.add_argument("--password", type=str, default=None)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
52 args = parser.parse_args()
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
53
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
54 syn = synapseclient.Synapse()
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
55 assert(args.apiKey != None or args.password != None)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
56 try:
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
57 if args.apiKey is not None:
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
58 syn.login(email=args.email, apiKey=args.apiKey)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
59 else:
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
60 syn.login(email=args.email, password = args.password)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
61 except:
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
62 print "Login Unsuccessful\n"
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
63 sys.exit(-1)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
64 else:
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
65 try:
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
66 entity=syn.get(args.entityId)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
67 except:
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
68 exc_type, exc_value, exc_traceback = sys.exc_info()
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
69 lines = traceback.format_exception(exc_type, exc_value,
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
70 exc_traceback)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
71 allLines = ''.join('!! ' + line for line in lines)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
72 print "Unsuccessful: error %s\n" % allLines
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
73 sys.exit(-1)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
74 else:
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
75 saveMetadata(entity, args.outputMetadataFile)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
76 saveData(entity, args.outputDataFile)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
77 sys.exit(0)
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
78
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
79 if __name__ == "__main__":
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
80 main()
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
81
60efb9214eaa Uploaded
melissacline
parents:
diff changeset
82