annotate msp_split.py @ 5:f2683ec717fe draft default tip

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
author tomnl
date Wed, 18 Sep 2019 05:46:09 -0400
parents 89f33758ad22
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
1 from __future__ import print_function
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
2 import argparse
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
3 import textwrap
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
4 import os
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
5 import re
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
6 import csv
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
7 import math
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
8
5
f2683ec717fe planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents: 2
diff changeset
9 def msp_split(i, o, x, n):
f2683ec717fe planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents: 2
diff changeset
10 spec_total = lcount(x, i)
0
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
11 spec_lim = math.ceil(spec_total/float(n))
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
12 spec_c = 0
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
13 filelist = []
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
14 header = ''
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
15 print('spec_lim', spec_lim)
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
16 with open(i, 'r') as msp_in:
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
17 for i in range(1, n+1):
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
18 with open(os.path.join(o, 'file{}.msp'.format(str(i).zfill(len(str(n))))), 'w+') as msp_out:
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
19 while spec_c <= spec_lim:
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
20 if header:
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
21 msp_out.write(header)
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
22 header = ''
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
23 line = msp_in.readline()
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
24
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
25 if not line:
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
26 break # end of file
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
27
5
f2683ec717fe planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents: 2
diff changeset
28 if re.match('^{}.*$'.format(x), line, re.IGNORECASE):
0
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
29 header = line
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
30 spec_c += 1
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
31 else:
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
32 msp_out.write(line)
5
f2683ec717fe planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents: 2
diff changeset
33 spec_c = 1
0
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
34
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
35 return filelist
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
36
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
37 def lcount(keyword, fname):
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
38 with open(fname, 'r') as fin:
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
39 return sum([1 for line in fin if keyword in line])
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
40
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
41 def main():
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
42
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
43 p = argparse.ArgumentParser(prog='PROG',
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
44 formatter_class=argparse.RawDescriptionHelpFormatter,
2
89f33758ad22 planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents: 0
diff changeset
45 description='''split msp files''',
89f33758ad22 planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents: 0
diff changeset
46 )
0
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
47
2
89f33758ad22 planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents: 0
diff changeset
48 p.add_argument('-i', dest='i', help='msp file', required=True)
5
f2683ec717fe planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents: 2
diff changeset
49 p.add_argument('-x', dest='x', help='name', default='RECORD_TITLE:')
0
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
50 p.add_argument('-o', dest='o', help='out dir', required=True)
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
51 p.add_argument('-n', dest='n',)
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
52
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
53
5
f2683ec717fe planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents: 2
diff changeset
54 args = p.parse_args()
0
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
55
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
56 if not os.path.exists(args.o):
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
57 os.makedirs(args.o)
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
58 print('in file', args.i)
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
59 print('out dir', args.o)
5
f2683ec717fe planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents: 2
diff changeset
60 print('Name of starting value in MSP', args.x)
0
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
61 print('nm files', args.n)
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
62
5
f2683ec717fe planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents: 2
diff changeset
63 msp_split(args.i, args.o, args.x, int(args.n))
0
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
64
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
65
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
66 if __name__ == '__main__':
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
67 main()
aa55ff1d76d4 planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff changeset
68