Mercurial > repos > tomnl > create_sqlite_db
comparison msp_split.py @ 26:e25e717386a6 draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
author | tomnl |
---|---|
date | Thu, 14 Jun 2018 09:21:59 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
25:bc86834acaed | 26:e25e717386a6 |
---|---|
1 from __future__ import print_function | |
2 import argparse | |
3 import textwrap | |
4 import os | |
5 import re | |
6 import csv | |
7 import math | |
8 | |
9 def msp_split(i, o, n): | |
10 spec_total = lcount('NAME', i) | |
11 spec_lim = math.ceil(spec_total/float(n)) | |
12 spec_c = 0 | |
13 filelist = [] | |
14 header = '' | |
15 print('spec_lim', spec_lim) | |
16 with open(i, 'r') as msp_in: | |
17 for i in range(1, n+1): | |
18 with open(os.path.join(o, 'file{}.msp'.format(str(i).zfill(len(str(n))))), 'w+') as msp_out: | |
19 while spec_c <= spec_lim: | |
20 if header: | |
21 msp_out.write(header) | |
22 header = '' | |
23 line = msp_in.readline() | |
24 | |
25 if not line: | |
26 break # end of file | |
27 | |
28 if re.match('^NAME:.*$', line, re.IGNORECASE): | |
29 header = line | |
30 spec_c += 1 | |
31 else: | |
32 msp_out.write(line) | |
33 spec_c = 1 | |
34 | |
35 return filelist | |
36 | |
37 def lcount(keyword, fname): | |
38 with open(fname, 'r') as fin: | |
39 return sum([1 for line in fin if keyword in line]) | |
40 | |
41 def main(): | |
42 | |
43 p = argparse.ArgumentParser(prog='PROG', | |
44 formatter_class=argparse.RawDescriptionHelpFormatter, | |
45 description='''split msp files''', | |
46 ) | |
47 | |
48 p.add_argument('-i', dest='i', help='msp file', required=True) | |
49 p.add_argument('-o', dest='o', help='out dir', required=True) | |
50 p.add_argument('-n', dest='n',) | |
51 | |
52 | |
53 args = p.parse_args() | |
54 | |
55 if not os.path.exists(args.o): | |
56 os.makedirs(args.o) | |
57 print('in file', args.i) | |
58 print('out dir', args.o) | |
59 print('nm files', args.n) | |
60 | |
61 msp_split(args.i, args.o, int(args.n)) | |
62 | |
63 | |
64 if __name__ == '__main__': | |
65 main() | |
66 |