Mercurial > repos > tomnl > dma_filelist_generation
comparison msp_split.py @ 8:8de790489301 draft
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
author | tomnl |
---|---|
date | Mon, 14 May 2018 08:15:07 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
7:1aca78735588 | 8:8de790489301 |
---|---|
1 from __future__ import print_function | |
2 import argparse | |
3 import textwrap | |
4 import os | |
5 import re | |
6 import csv | |
7 import math | |
8 | |
9 def msp_split(i, o, n): | |
10 spec_total = lcount('NAME', i) | |
11 spec_lim = math.ceil(spec_total/float(n)) | |
12 spec_c = 0 | |
13 filelist = [] | |
14 header = '' | |
15 print('spec_lim', spec_lim) | |
16 with open(i, 'r') as msp_in: | |
17 for i in range(1, n+1): | |
18 with open(os.path.join(o, 'file{}.msp'.format(str(i).zfill(len(str(n))))), 'w+') as msp_out: | |
19 while spec_c <= spec_lim: | |
20 if header: | |
21 msp_out.write(header) | |
22 header = '' | |
23 line = msp_in.readline() | |
24 | |
25 if not line: | |
26 break # end of file | |
27 | |
28 if re.match('^NAME:.*$', line, re.IGNORECASE): | |
29 header = line | |
30 spec_c += 1 | |
31 else: | |
32 msp_out.write(line) | |
33 spec_c = 1 | |
34 | |
35 return filelist | |
36 | |
37 def lcount(keyword, fname): | |
38 with open(fname, 'r') as fin: | |
39 return sum([1 for line in fin if keyword in line]) | |
40 | |
41 def main(): | |
42 | |
43 p = argparse.ArgumentParser(prog='PROG', | |
44 formatter_class=argparse.RawDescriptionHelpFormatter, | |
45 description='''Create filelist for DMA DIMS nearline workflow''', | |
46 epilog=textwrap.dedent(''' | |
47 ------------------------------------------------------------------------- | |
48 | |
49 Example Usage | |
50 | |
51 python dma-filelist-generation.py -i [dir with sample files], [dir with blank files] -o . | |
52 | |
53 ''')) | |
54 | |
55 p.add_argument('-i', dest='i', help='dir with sample files', required=True) | |
56 p.add_argument('-o', dest='o', help='out dir', required=True) | |
57 p.add_argument('-n', dest='n',) | |
58 | |
59 | |
60 args = p.parse_args() | |
61 | |
62 if not os.path.exists(args.o): | |
63 os.makedirs(args.o) | |
64 print('in file', args.i) | |
65 print('out dir', args.o) | |
66 print('nm files', args.n) | |
67 | |
68 msp_split(args.i, args.o, int(args.n)) | |
69 | |
70 | |
71 if __name__ == '__main__': | |
72 main() | |
73 |