Mercurial > repos > bgruening > split_file_to_collection
annotate split_file_to_collection.py @ 3:128fb354ed42 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 8d069684e155d2f5b6fae06d14d98ce41321da53"
author | bgruening |
---|---|
date | Tue, 10 Sep 2019 12:30:46 -0400 |
parents | 0cf37301f754 |
children | b2ad7eb9bab7 |
rev | line source |
---|---|
0
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
1 #!/usr/bin/env python |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
2 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
3 import argparse |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
4 import os |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
5 import re |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
6 import random |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
7 import math |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
8 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
9 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
10 """ |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
11 regexes that indicate the *beginning* of a record |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
12 new file types can be added by appending to this dict, |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
13 updating the parser, and adding a new type option in the Galaxy wrapper |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
14 """ |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
15 FILETYPES = {'fasta': '^>', |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
16 'fastq': '^@', |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
17 'tabular': '^.*', |
2
0cf37301f754
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 06ffe450bafa280eee8a4331c9cfc9e1ece7c522"
bgruening
parents:
0
diff
changeset
|
18 'txt': '^.*', |
0
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
19 'mgf': '^BEGIN IONS'} |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
20 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
21 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
22 def main(): |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
23 ps = parser_cli() |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
24 args = vars(ps.parse_args()) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
25 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
26 # get args and validate |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
27 in_file = args["in"] |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
28 if not os.path.isfile(args["in"]): |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
29 raise FileNotFoundError('Input file does not exist') |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
30 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
31 out_dir = args["out_dir"] |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
32 if not os.path.isdir(args["out_dir"]): |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
33 raise FileNotFoundError('out_dir is not a directory') |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
34 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
35 top = args["top"] |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
36 if top < 0: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
37 raise ValueError("Number of header lines cannot be negative") |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
38 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
39 ftype = args["ftype"] |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
40 |
2
0cf37301f754
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 06ffe450bafa280eee8a4331c9cfc9e1ece7c522"
bgruening
parents:
0
diff
changeset
|
41 assert ftype != "generic" or args["generic_re"] != None, "--generic_re needs to be given for generic input" |
0cf37301f754
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 06ffe450bafa280eee8a4331c9cfc9e1ece7c522"
bgruening
parents:
0
diff
changeset
|
42 |
0
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
43 if args["ftype"] == "tabular" and args["by"] == "col": |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
44 args["match"] = replace_mapped_chars(args["match"]) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
45 args["sub"] = replace_mapped_chars(args["sub"]) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
46 split_by_column(args, in_file, out_dir, top) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
47 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
48 else: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
49 split_by_record(args, in_file, out_dir, top, ftype) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
50 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
51 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
52 def parser_cli(): |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
53 parser = argparse.ArgumentParser(description="split a file into multiple files. " + |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
54 "Can split on the column of a tabular file, " + |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
55 "with custom and useful names based on column value.") |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
56 parser.add_argument('--in', '-i', required=True, help="The input file") |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
57 parser.add_argument('--out_dir', '-o', default=os.getcwd(), help="The output directory", required=True) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
58 parser.add_argument('--file_names', '-a', help="If not splitting by column, the base name of the new files") |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
59 parser.add_argument('--file_ext', '-e', help="If not splitting by column," + |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
60 " the extension of the new files (without a period)") |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
61 parser.add_argument('--ftype', '-f', help="The type of the file to split", required = True, |
2
0cf37301f754
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 06ffe450bafa280eee8a4331c9cfc9e1ece7c522"
bgruening
parents:
0
diff
changeset
|
62 choices=["mgf", "fastq", "fasta", "tabular", "txt", "generic"]) |
0cf37301f754
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 06ffe450bafa280eee8a4331c9cfc9e1ece7c522"
bgruening
parents:
0
diff
changeset
|
63 parser.add_argument('--generic_re', '-g', help="Regular expression indicating the start of a new record (only for generic)", required = False) |
0
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
64 parser.add_argument('--by', '-b', help="Split by line or by column (tabular only)", |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
65 default = "row", choices = ["col", "row"]) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
66 parser.add_argument('--top', '-t', type=int, default=0, help="Number of header lines to carry over to new files. " + |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
67 "(tabular only).") |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
68 parser.add_argument('--rand', '-r', help="Divide records randomly into new files", action='store_true') |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
69 parser.add_argument('--seed', '-x', help="Provide a seed for the random number generator. " + |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
70 "If not provided and args[\"rand\"]==True, then date is used", type=int) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
71 parser.add_argument('--numnew', '-n', type=int, default = 1, |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
72 help="Number of output files desired. Not valid for splitting on a column") |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
73 parser.add_argument('--batch', action='store_true', |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
74 help="Distribute files to collection while maintaining order. Ignored if splitting on column.") |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
75 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
76 bycol = parser.add_argument_group('If splitting on a column') |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
77 bycol.add_argument('--match', '-m', default = "(.*)", help="The regular expression to match id column entries") |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
78 bycol.add_argument('--sub', '-s', default = r'\1', |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
79 help="The regular expression to substitute in for the matched pattern.") |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
80 bycol.add_argument('--id_column', '-c', default="1", |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
81 help="Column that is used to name output files. Indexed starting from 1.", type=int) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
82 return parser |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
83 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
84 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
85 def close_files(file_list): |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
86 # finally, close all files |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
87 for open_file in file_list: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
88 open_file.close() |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
89 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
90 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
91 def replace_mapped_chars(pattern): |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
92 """ |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
93 handles special escaped characters when coming from galaxy |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
94 """ |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
95 mapped_chars = {'\'': '__sq__', '\\': '__backslash__'} |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
96 for key, value in mapped_chars.items(): |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
97 pattern = pattern.replace(value, key) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
98 return pattern |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
99 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
100 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
101 def split_by_record(args, in_file, out_dir, top, ftype): |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
102 # get record separator for given filetype |
2
0cf37301f754
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 06ffe450bafa280eee8a4331c9cfc9e1ece7c522"
bgruening
parents:
0
diff
changeset
|
103 sep = re.compile(FILETYPES.get(ftype, args["generic_re"])) |
0
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
104 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
105 numnew = args["numnew"] |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
106 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
107 # random division |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
108 rand = args["rand"] |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
109 seed = args["seed"] |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
110 if seed: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
111 random.seed(seed) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
112 else: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
113 random.seed() |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
114 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
115 # batched division (maintains order) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
116 batch = args["batch"] |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
117 # define n_per_file so we don't get a warning about ref before assignment |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
118 n_per_file = math.inf |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
119 if batch: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
120 # number of records |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
121 with open(in_file) as f: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
122 i = 0 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
123 for line in f: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
124 if re.match(sep, line) is not None: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
125 i+=1 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
126 n_records = i + 1 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
127 if top: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
128 n_records -= top # don't count the top lines |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
129 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
130 # approx. number of lines per file |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
131 n_per_file = n_records // numnew |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
132 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
133 # make new files |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
134 # strip extension of old file and add number |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
135 custom_new_file_name = args["file_names"] |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
136 custom_new_file_ext = "." + args["file_ext"] |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
137 if custom_new_file_name is None: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
138 new_file_base = os.path.splitext(os.path.basename(in_file)) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
139 else: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
140 new_file_base = [custom_new_file_name, custom_new_file_ext] |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
141 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
142 newfiles = [ |
3
128fb354ed42
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 8d069684e155d2f5b6fae06d14d98ce41321da53"
bgruening
parents:
2
diff
changeset
|
143 open(os.path.join(out_dir, "%s_%06d%s" % (new_file_base[0], count, new_file_base[1])) , "w") |
0
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
144 for count in range(0, numnew) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
145 ] |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
146 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
147 # bunch o' counters |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
148 # index to list of new files |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
149 new_file_counter = 0 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
150 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
151 # used for top |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
152 # number of lines read so far |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
153 n_read = 0 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
154 # to contain header specified by top |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
155 header = "" |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
156 # keep track of the files that have been opened so far |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
157 fresh_files = {i for i in range(0, numnew)} |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
158 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
159 # keep track in loop of number of records in each file |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
160 # only used in batch |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
161 records_in_file = 0 |
3
128fb354ed42
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 8d069684e155d2f5b6fae06d14d98ce41321da53"
bgruening
parents:
2
diff
changeset
|
162 |
0
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
163 # open file |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
164 with open(in_file, "r") as file: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
165 record = "" |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
166 for line in file: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
167 n_read += 1 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
168 if n_read <= top: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
169 header += line |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
170 continue |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
171 # check if beginning of line is record sep |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
172 # if beginning of line is record sep, either start record or finish one |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
173 if re.match(sep, line) is not None: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
174 # this only happens first time through |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
175 if record == "": |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
176 record += line |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
177 else: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
178 # if is in fresh_files, write header and drop from freshFiles |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
179 if new_file_counter in fresh_files: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
180 newfiles[new_file_counter].write(header) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
181 fresh_files.remove(new_file_counter) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
182 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
183 # write record to file |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
184 newfiles[new_file_counter].write(record) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
185 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
186 # if not the first time through, we assign the new record |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
187 record = line |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
188 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
189 # change destination file |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
190 if rand: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
191 new_file_counter = int(math.floor(random.random() * numnew)) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
192 elif batch: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
193 # number of records read per file |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
194 records_in_file += 1 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
195 # have we reached the max for each file? |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
196 # if so, switch file |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
197 if records_in_file >= n_per_file: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
198 new_file_counter = (new_file_counter + 1) % numnew |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
199 records_in_file = 0 # reset to 0 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
200 else: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
201 new_file_counter = (new_file_counter + 1) % numnew |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
202 # if beginning of line is not record sep, we must be inside a record |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
203 # so just append |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
204 else: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
205 record += line |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
206 # after loop, write final record to file |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
207 newfiles[new_file_counter].write(record) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
208 # close new files |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
209 close_files(newfiles) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
210 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
211 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
212 def split_by_column(args, in_file, out_dir, top): |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
213 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
214 # shift to 0-based indexing |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
215 id_col = int(args["id_column"]) - 1 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
216 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
217 try: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
218 match = re.compile(args["match"]) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
219 except re.error: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
220 print("ERROR: Match (-m) supplied is not valid regex.") |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
221 raise |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
222 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
223 sub = args["sub"] |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
224 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
225 # set of file names |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
226 new_files = dict() |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
227 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
228 # keep track of how many lines have been read |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
229 n_read = 0 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
230 header = "" |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
231 with open(in_file) as file: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
232 for line in file: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
233 # if still in top, save to header |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
234 n_read += 1 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
235 if n_read <= top: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
236 header += line |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
237 continue |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
238 # split into columns, on tab |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
239 fields = re.split(r'\t', line.strip('\n')) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
240 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
241 # get id column value |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
242 id_col_val = fields[id_col] |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
243 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
244 # use regex to get new file name |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
245 out_file_name = re.sub(match, sub, id_col_val) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
246 out_file_path = os.path.join(out_dir, out_file_name) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
247 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
248 # write |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
249 if out_file_name not in new_files.keys(): |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
250 # open file (new, so not already open) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
251 current_new_file = open(out_file_path, "w") |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
252 current_new_file.write(header) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
253 current_new_file.write(line) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
254 # add to dict |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
255 new_files[out_file_name] = current_new_file |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
256 else: |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
257 # file is already open, so just write to it |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
258 new_files[out_file_name].write(line) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
259 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
260 # finally, close all files |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
261 close_files(new_files.values()) |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
262 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
263 |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
264 if __name__ == "__main__": |
e9d56b4c3209
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 85015046a6d8a9dc0f4b54611986676aceeeadd7
bgruening
parents:
diff
changeset
|
265 main() |