Mercurial > repos > tomnl > create_sqlite_db
annotate msp_split.py @ 28:be54f396ecfe draft default tip
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 1800a3818988f21ef129e500818c9a087fce5875
author | tomnl |
---|---|
date | Wed, 18 Jul 2018 05:47:49 -0400 |
parents | e25e717386a6 |
children |
rev | line source |
---|---|
26
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
1 from __future__ import print_function |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
2 import argparse |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
3 import textwrap |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
4 import os |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
5 import re |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
6 import csv |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
7 import math |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
8 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
9 def msp_split(i, o, n): |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
10 spec_total = lcount('NAME', i) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
11 spec_lim = math.ceil(spec_total/float(n)) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
12 spec_c = 0 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
13 filelist = [] |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
14 header = '' |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
15 print('spec_lim', spec_lim) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
16 with open(i, 'r') as msp_in: |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
17 for i in range(1, n+1): |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
18 with open(os.path.join(o, 'file{}.msp'.format(str(i).zfill(len(str(n))))), 'w+') as msp_out: |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
19 while spec_c <= spec_lim: |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
20 if header: |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
21 msp_out.write(header) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
22 header = '' |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
23 line = msp_in.readline() |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
24 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
25 if not line: |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
26 break # end of file |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
27 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
28 if re.match('^NAME:.*$', line, re.IGNORECASE): |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
29 header = line |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
30 spec_c += 1 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
31 else: |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
32 msp_out.write(line) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
33 spec_c = 1 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
34 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
35 return filelist |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
36 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
37 def lcount(keyword, fname): |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
38 with open(fname, 'r') as fin: |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
39 return sum([1 for line in fin if keyword in line]) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
40 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
41 def main(): |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
42 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
43 p = argparse.ArgumentParser(prog='PROG', |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
44 formatter_class=argparse.RawDescriptionHelpFormatter, |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
45 description='''split msp files''', |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
46 ) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
47 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
48 p.add_argument('-i', dest='i', help='msp file', required=True) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
49 p.add_argument('-o', dest='o', help='out dir', required=True) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
50 p.add_argument('-n', dest='n',) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
51 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
52 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
53 args = p.parse_args() |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
54 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
55 if not os.path.exists(args.o): |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
56 os.makedirs(args.o) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
57 print('in file', args.i) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
58 print('out dir', args.o) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
59 print('nm files', args.n) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
60 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
61 msp_split(args.i, args.o, int(args.n)) |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
62 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
63 |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
64 if __name__ == '__main__': |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
65 main() |
e25e717386a6
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
tomnl
parents:
diff
changeset
|
66 |