annotate refinery_file_splitter.py @ 1:b87749d7a24c draft default tip

planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
author refinery-platform
date Thu, 22 Feb 2018 14:14:03 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
1 #!/usr/bin/env python
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
2
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
3 '''
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
4 Test tool for splitting output files from Refinery Test Tools
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
5
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
6 @author: Scott Ouellette
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
7
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
8 Input: one text file
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
9 Output: N output files based on the amount of input files that
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
10 got concatenated from Refinery test tool runs
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
11
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
12 Requires Python v2.7
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
13
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
14 '''
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
15
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
16 import re
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
17 import argparse
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
18
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
19
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
20 def main(args):
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
21 create_many_files(args.input_file)
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
22
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
23
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
24 def create_many_files(input_file):
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
25 # Split file's content when we see data that wasn't added by test tool runs
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
26 file_content = re.split("Output.*|Input.*", input_file.read())
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
27
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
28 sanitized_data = [
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
29 data.lstrip("\n") for data in file_content if data.rstrip("\n")]
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
30
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
31 # Create N ouput files based on the number of inputs run through test tools
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
32 for num, file_content in enumerate(sanitized_data):
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
33 open("Output file {}.txt".format(num + 1), 'w').write(file_content)
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
34
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
35
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
36 if __name__ == '__main__':
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
37 version = "%(prog)s 0.1"
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
38 description = "Test tool for running workflows on Galaxy platform from Refinery"
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
39 parser = argparse.ArgumentParser(description=description, version=version)
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
40
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
41 parser.add_argument('-i', '--in-file', dest='input_file',
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
42 type=file, metavar='INPUT_FILE', required=True,
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
43 help='name of the input file')
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
44
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
45 # check argument values for errors
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
46 try:
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
47 args = parser.parse_args()
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
48 except IOError as e:
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
49 parser.error(e)
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
50
b87749d7a24c planemo upload commit 4fb0a789956149e5a58f4e370d7fe14f4e8bcf79
refinery-platform
parents:
diff changeset
51 main(args)