Mercurial > repos > tomnl > dma_filelist_generation
annotate msp_split.py @ 8:8de790489301 draft
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
author | tomnl |
---|---|
date | Mon, 14 May 2018 08:15:07 -0400 |
parents | |
children |
rev | line source |
---|---|
8
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
1 from __future__ import print_function |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
2 import argparse |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
3 import textwrap |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
4 import os |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
5 import re |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
6 import csv |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
7 import math |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
8 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
9 def msp_split(i, o, n): |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
10 spec_total = lcount('NAME', i) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
11 spec_lim = math.ceil(spec_total/float(n)) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
12 spec_c = 0 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
13 filelist = [] |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
14 header = '' |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
15 print('spec_lim', spec_lim) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
16 with open(i, 'r') as msp_in: |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
17 for i in range(1, n+1): |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
18 with open(os.path.join(o, 'file{}.msp'.format(str(i).zfill(len(str(n))))), 'w+') as msp_out: |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
19 while spec_c <= spec_lim: |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
20 if header: |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
21 msp_out.write(header) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
22 header = '' |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
23 line = msp_in.readline() |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
24 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
25 if not line: |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
26 break # end of file |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
27 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
28 if re.match('^NAME:.*$', line, re.IGNORECASE): |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
29 header = line |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
30 spec_c += 1 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
31 else: |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
32 msp_out.write(line) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
33 spec_c = 1 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
34 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
35 return filelist |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
36 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
37 def lcount(keyword, fname): |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
38 with open(fname, 'r') as fin: |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
39 return sum([1 for line in fin if keyword in line]) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
40 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
41 def main(): |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
42 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
43 p = argparse.ArgumentParser(prog='PROG', |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
44 formatter_class=argparse.RawDescriptionHelpFormatter, |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
45 description='''Create filelist for DMA DIMS nearline workflow''', |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
46 epilog=textwrap.dedent(''' |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
47 ------------------------------------------------------------------------- |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
48 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
49 Example Usage |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
50 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
51 python dma-filelist-generation.py -i [dir with sample files], [dir with blank files] -o . |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
52 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
53 ''')) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
54 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
55 p.add_argument('-i', dest='i', help='dir with sample files', required=True) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
56 p.add_argument('-o', dest='o', help='out dir', required=True) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
57 p.add_argument('-n', dest='n',) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
58 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
59 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
60 args = p.parse_args() |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
61 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
62 if not os.path.exists(args.o): |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
63 os.makedirs(args.o) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
64 print('in file', args.i) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
65 print('out dir', args.o) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
66 print('nm files', args.n) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
67 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
68 msp_split(args.i, args.o, int(args.n)) |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
69 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
70 |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
71 if __name__ == '__main__': |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
72 main() |
8de790489301
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
tomnl
parents:
diff
changeset
|
73 |