view msp_split.py @ 16:5b3657390d91 draft

planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit af689d3f20c86f69aa824545e668280bcd5e0cca
author tomnl
date Mon, 14 May 2018 08:09:57 -0400
parents
children
line wrap: on
line source

from __future__ import print_function
import argparse
import textwrap
import os
import re
import csv
import math

def msp_split(i, o, n):
    spec_total = lcount('NAME', i)
    spec_lim = math.ceil(spec_total/float(n))
    spec_c = 0
    filelist = []
    header = ''
    print('spec_lim', spec_lim)
    with open(i, 'r') as msp_in:
        for i in range(1, n+1):
            with open(os.path.join(o, 'file{}.msp'.format(str(i).zfill(len(str(n))))), 'w+') as msp_out:
                while spec_c <= spec_lim:
                    if header:
                        msp_out.write(header)
                        header = ''
                    line = msp_in.readline()

                    if not line:
                        break  # end of file

                    if re.match('^NAME:.*$', line, re.IGNORECASE):
                        header = line
                        spec_c += 1
                    else:
                        msp_out.write(line)
                spec_c = 1 

    return filelist

def lcount(keyword, fname):
    with open(fname, 'r') as fin:
        return sum([1 for line in fin if keyword in line])

def main():

    p = argparse.ArgumentParser(prog='PROG',
                                formatter_class=argparse.RawDescriptionHelpFormatter,
                                description='''Create filelist for DMA DIMS nearline workflow''',
                                epilog=textwrap.dedent('''
                            -------------------------------------------------------------------------

                            Example Usage

                            python dma-filelist-generation.py -i [dir with sample files], [dir with blank files] -o .

                            '''))

    p.add_argument('-i', dest='i', help='dir with sample files', required=True)
    p.add_argument('-o', dest='o', help='out dir', required=True)
    p.add_argument('-n', dest='n',)


    args = p.parse_args() 

    if not os.path.exists(args.o):
        os.makedirs(args.o)
    print('in file', args.i)
    print('out dir', args.o)
    print('nm files', args.n)

    msp_split(args.i, args.o, int(args.n))


if __name__ == '__main__':
    main()