Mercurial > repos > tomnl > create_sqlite_db
diff msp_split.py @ 26:e25e717386a6 draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f79fa34772bbab836d89cf8bad52d49285409a98
author | tomnl |
---|---|
date | Thu, 14 Jun 2018 09:21:59 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/msp_split.py Thu Jun 14 09:21:59 2018 -0400 @@ -0,0 +1,66 @@ +from __future__ import print_function +import argparse +import textwrap +import os +import re +import csv +import math + +def msp_split(i, o, n): + spec_total = lcount('NAME', i) + spec_lim = math.ceil(spec_total/float(n)) + spec_c = 0 + filelist = [] + header = '' + print('spec_lim', spec_lim) + with open(i, 'r') as msp_in: + for i in range(1, n+1): + with open(os.path.join(o, 'file{}.msp'.format(str(i).zfill(len(str(n))))), 'w+') as msp_out: + while spec_c <= spec_lim: + if header: + msp_out.write(header) + header = '' + line = msp_in.readline() + + if not line: + break # end of file + + if re.match('^NAME:.*$', line, re.IGNORECASE): + header = line + spec_c += 1 + else: + msp_out.write(line) + spec_c = 1 + + return filelist + +def lcount(keyword, fname): + with open(fname, 'r') as fin: + return sum([1 for line in fin if keyword in line]) + +def main(): + + p = argparse.ArgumentParser(prog='PROG', + formatter_class=argparse.RawDescriptionHelpFormatter, + description='''split msp files''', + ) + + p.add_argument('-i', dest='i', help='msp file', required=True) + p.add_argument('-o', dest='o', help='out dir', required=True) + p.add_argument('-n', dest='n',) + + + args = p.parse_args() + + if not os.path.exists(args.o): + os.makedirs(args.o) + print('in file', args.i) + print('out dir', args.o) + print('nm files', args.n) + + msp_split(args.i, args.o, int(args.n)) + + +if __name__ == '__main__': + main() +