Mercurial > repos > tomnl > lcms_interval_scheduling
comparison msp_split.py @ 8:9cd1921e21b2 draft
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
| author | tomnl |
|---|---|
| date | Mon, 14 May 2018 08:14:26 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 7:692803fe646e | 8:9cd1921e21b2 |
|---|---|
| 1 from __future__ import print_function | |
| 2 import argparse | |
| 3 import textwrap | |
| 4 import os | |
| 5 import re | |
| 6 import csv | |
| 7 import math | |
| 8 | |
| 9 def msp_split(i, o, n): | |
| 10 spec_total = lcount('NAME', i) | |
| 11 spec_lim = math.ceil(spec_total/float(n)) | |
| 12 spec_c = 0 | |
| 13 filelist = [] | |
| 14 header = '' | |
| 15 print('spec_lim', spec_lim) | |
| 16 with open(i, 'r') as msp_in: | |
| 17 for i in range(1, n+1): | |
| 18 with open(os.path.join(o, 'file{}.msp'.format(str(i).zfill(len(str(n))))), 'w+') as msp_out: | |
| 19 while spec_c <= spec_lim: | |
| 20 if header: | |
| 21 msp_out.write(header) | |
| 22 header = '' | |
| 23 line = msp_in.readline() | |
| 24 | |
| 25 if not line: | |
| 26 break # end of file | |
| 27 | |
| 28 if re.match('^NAME:.*$', line, re.IGNORECASE): | |
| 29 header = line | |
| 30 spec_c += 1 | |
| 31 else: | |
| 32 msp_out.write(line) | |
| 33 spec_c = 1 | |
| 34 | |
| 35 return filelist | |
| 36 | |
| 37 def lcount(keyword, fname): | |
| 38 with open(fname, 'r') as fin: | |
| 39 return sum([1 for line in fin if keyword in line]) | |
| 40 | |
| 41 def main(): | |
| 42 | |
| 43 p = argparse.ArgumentParser(prog='PROG', | |
| 44 formatter_class=argparse.RawDescriptionHelpFormatter, | |
| 45 description='''Create filelist for DMA DIMS nearline workflow''', | |
| 46 epilog=textwrap.dedent(''' | |
| 47 ------------------------------------------------------------------------- | |
| 48 | |
| 49 Example Usage | |
| 50 | |
| 51 python dma-filelist-generation.py -i [dir with sample files], [dir with blank files] -o . | |
| 52 | |
| 53 ''')) | |
| 54 | |
| 55 p.add_argument('-i', dest='i', help='dir with sample files', required=True) | |
| 56 p.add_argument('-o', dest='o', help='out dir', required=True) | |
| 57 p.add_argument('-n', dest='n',) | |
| 58 | |
| 59 | |
| 60 args = p.parse_args() | |
| 61 | |
| 62 if not os.path.exists(args.o): | |
| 63 os.makedirs(args.o) | |
| 64 print('in file', args.i) | |
| 65 print('out dir', args.o) | |
| 66 print('nm files', args.n) | |
| 67 | |
| 68 msp_split(args.i, args.o, int(args.n)) | |
| 69 | |
| 70 | |
| 71 if __name__ == '__main__': | |
| 72 main() | |
| 73 |
