Mercurial > repos > tomnl > msp_split
annotate msp_split.py @ 5:f2683ec717fe draft default tip
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
| author | tomnl |
|---|---|
| date | Wed, 18 Sep 2019 05:46:09 -0400 |
| parents | 89f33758ad22 |
| children |
| rev | line source |
|---|---|
|
0
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
1 from __future__ import print_function |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
2 import argparse |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
3 import textwrap |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
4 import os |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
5 import re |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
6 import csv |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
7 import math |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
8 |
|
5
f2683ec717fe
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents:
2
diff
changeset
|
9 def msp_split(i, o, x, n): |
|
f2683ec717fe
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents:
2
diff
changeset
|
10 spec_total = lcount(x, i) |
|
0
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
11 spec_lim = math.ceil(spec_total/float(n)) |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
12 spec_c = 0 |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
13 filelist = [] |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
14 header = '' |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
15 print('spec_lim', spec_lim) |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
16 with open(i, 'r') as msp_in: |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
17 for i in range(1, n+1): |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
18 with open(os.path.join(o, 'file{}.msp'.format(str(i).zfill(len(str(n))))), 'w+') as msp_out: |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
19 while spec_c <= spec_lim: |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
20 if header: |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
21 msp_out.write(header) |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
22 header = '' |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
23 line = msp_in.readline() |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
24 |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
25 if not line: |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
26 break # end of file |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
27 |
|
5
f2683ec717fe
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents:
2
diff
changeset
|
28 if re.match('^{}.*$'.format(x), line, re.IGNORECASE): |
|
0
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
29 header = line |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
30 spec_c += 1 |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
31 else: |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
32 msp_out.write(line) |
|
5
f2683ec717fe
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents:
2
diff
changeset
|
33 spec_c = 1 |
|
0
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
34 |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
35 return filelist |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
36 |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
37 def lcount(keyword, fname): |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
38 with open(fname, 'r') as fin: |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
39 return sum([1 for line in fin if keyword in line]) |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
40 |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
41 def main(): |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
42 |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
43 p = argparse.ArgumentParser(prog='PROG', |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
44 formatter_class=argparse.RawDescriptionHelpFormatter, |
|
2
89f33758ad22
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
0
diff
changeset
|
45 description='''split msp files''', |
|
89f33758ad22
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
0
diff
changeset
|
46 ) |
|
0
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
47 |
|
2
89f33758ad22
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
0
diff
changeset
|
48 p.add_argument('-i', dest='i', help='msp file', required=True) |
|
5
f2683ec717fe
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents:
2
diff
changeset
|
49 p.add_argument('-x', dest='x', help='name', default='RECORD_TITLE:') |
|
0
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
50 p.add_argument('-o', dest='o', help='out dir', required=True) |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
51 p.add_argument('-n', dest='n',) |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
52 |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
53 |
|
5
f2683ec717fe
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents:
2
diff
changeset
|
54 args = p.parse_args() |
|
0
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
55 |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
56 if not os.path.exists(args.o): |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
57 os.makedirs(args.o) |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
58 print('in file', args.i) |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
59 print('out dir', args.o) |
|
5
f2683ec717fe
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents:
2
diff
changeset
|
60 print('Name of starting value in MSP', args.x) |
|
0
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
61 print('nm files', args.n) |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
62 |
|
5
f2683ec717fe
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
tomnl
parents:
2
diff
changeset
|
63 msp_split(args.i, args.o, args.x, int(args.n)) |
|
0
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
64 |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
65 |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
66 if __name__ == '__main__': |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
67 main() |
|
aa55ff1d76d4
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit dcfc95273101a7ef0405c2efb8d83f5d456ccd15
tomnl
parents:
diff
changeset
|
68 |
