comparison msp_split.py @ 26:e25e717386a6 draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
author tomnl
date Thu, 14 Jun 2018 09:21:59 -0400
parents
children
comparison
equal deleted inserted replaced
25:bc86834acaed 26:e25e717386a6
1 from __future__ import print_function
2 import argparse
3 import textwrap
4 import os
5 import re
6 import csv
7 import math
8
9 def msp_split(i, o, n):
10 spec_total = lcount('NAME', i)
11 spec_lim = math.ceil(spec_total/float(n))
12 spec_c = 0
13 filelist = []
14 header = ''
15 print('spec_lim', spec_lim)
16 with open(i, 'r') as msp_in:
17 for i in range(1, n+1):
18 with open(os.path.join(o, 'file{}.msp'.format(str(i).zfill(len(str(n))))), 'w+') as msp_out:
19 while spec_c <= spec_lim:
20 if header:
21 msp_out.write(header)
22 header = ''
23 line = msp_in.readline()
24
25 if not line:
26 break # end of file
27
28 if re.match('^NAME:.*$', line, re.IGNORECASE):
29 header = line
30 spec_c += 1
31 else:
32 msp_out.write(line)
33 spec_c = 1
34
35 return filelist
36
37 def lcount(keyword, fname):
38 with open(fname, 'r') as fin:
39 return sum([1 for line in fin if keyword in line])
40
41 def main():
42
43 p = argparse.ArgumentParser(prog='PROG',
44 formatter_class=argparse.RawDescriptionHelpFormatter,
45 description='''split msp files''',
46 )
47
48 p.add_argument('-i', dest='i', help='msp file', required=True)
49 p.add_argument('-o', dest='o', help='out dir', required=True)
50 p.add_argument('-n', dest='n',)
51
52
53 args = p.parse_args()
54
55 if not os.path.exists(args.o):
56 os.makedirs(args.o)
57 print('in file', args.i)
58 print('out dir', args.o)
59 print('nm files', args.n)
60
61 msp_split(args.i, args.o, int(args.n))
62
63
64 if __name__ == '__main__':
65 main()
66