Mercurial > repos > tomnl > cameradims
diff dma-filelist-generation.py @ 0:55aa2dd24828 draft
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit 6c48bd51987a28401de6cf5e49b1b30e5e73fe16-dirty
author | tomnl |
---|---|
date | Tue, 27 Mar 2018 06:52:39 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dma-filelist-generation.py Tue Mar 27 06:52:39 2018 -0400 @@ -0,0 +1,154 @@ +import argparse +import textwrap +import os +import re +import collections +import csv +from operator import itemgetter + +def check_folder(in_dir, reg_string): + filelist = [] + for f in os.listdir(in_dir): + + if re.match(reg_string, f, re.IGNORECASE): + + fn, fe = os.path.splitext(f) + fnl = fn.split("_") + filename = fn + fullpth = os.path.join(in_dir, f) + wells = fnl[0] + + + + if re.match("^.*pos.*", f, re.IGNORECASE): + polarity = "pos" + elif re.match("^.*neg.*", f, re.IGNORECASE): + polarity = "neg" + else: + error_message = "Files need to have either 'pos' or 'neg' in the file name" + print error_message + return 1, error_message, "", "" + if re.match("^.*blank.*", f, re.IGNORECASE): + sample_type = "blank" + else: + sample_type = "sample" + + + filelist.append([wells, sample_type, polarity, filename, fullpth]) + + return filelist + +def get_filelist(filesin, o, file_type='mzML', create_filelist=True): + + filedict = collections.OrderedDict() + + + if file_type=="mzML": + reg_string = "^.*mzML$" + else: + reg_string = "^.*raw$" + + if isinstance(filesin, list): + filelist = [] + for f in filesin: + filelist.extend(check_folder(f, reg_string)) + else: + filelist = check_folder(filesin, reg_string) + + # Turn filelist into a dictionary + for f1 in filelist: + well = f1[0] + filedict[well] = [] + for f2 in filelist: + if well == f2[0]: + filedict[well].append(f2[1:len(f2)+1]) + + + + filelist = sorted(filelist, key=itemgetter(0, 1)) + + for k, v in filedict.iteritems(): + classes = [i[0] for i in v] + classes.sort() + if not classes == ['blank', 'sample']: + error_message = "!!!!ERROR!!!! Blank and sample required for each well, file type {}".format(file_type) + print error_message + return 1, error_message, "", "" + + print 'files of type {} checked, files OK'.format(file_type) + + if create_filelist: + + outname = write_filedict(filedict, o, file_type) + print 'filelist created in folder {}, using file type {}, full path {}'.format(o, file_type, outname) + + return 0, "files OK", filedict, filelist + +def write_filedict(filedict, out_dir, file_type, file_spacing='tsv'): + + outname = os.path.join(out_dir,'filelist_{}.{}'.format(file_type, file_spacing)) + + if file_spacing=='tsv': + delim = '\t' + elif file_spacing=='csv': + delim = ',' + else: + delim = ',' + + + with open(outname, 'wb') as csvfile: + w = csv.writer(csvfile, delimiter=delim) + + w.writerow(['filename','classLabel', 'multilist', 'multilistLabel']) + c = 1 + for k, v in filedict.iteritems(): + for i in v: + w.writerow([os.path.basename(i[3]), i[0], c, k]) + c +=1 + + return outname + + +def main(): + + p = argparse.ArgumentParser(prog='PROG', + formatter_class=argparse.RawDescriptionHelpFormatter, + description='''Create filelist for DMA DIMS nearline workflow''', + epilog=textwrap.dedent(''' + ------------------------------------------------------------------------- + + Example Usage + + python dma-filelist-generation.py -i [dir with sample files], [dir with blank files] -o . + + ''')) + + p.add_argument('-i', dest='i', help='dir with sample files', nargs = '*', required=True) + p.add_argument('-o', dest='o', help='out dir', required=True) + p.add_argument('--check_mzml', dest='check_mzml', action='store_true') + p.add_argument('--check_raw', dest='check_raw', action='store_true') + p.add_argument('--create_filelist_mzml', dest='create_filelist_mzml', action='store_true') + p.add_argument('--create_filelist_raw', dest='create_filelist_raw', action='store_true') + + args = p.parse_args() + + if not os.path.exists(args.o): + os.makedirs(args.o) + print args.o + + if not args.check_mzml and not args.check_raw: + print '--check_mzml or --check_raw (or both) are required as inputs' + exit() + + if args.check_mzml: + get_filelist(args.i, args.o, file_type='mzML', create_filelist=args.create_filelist_mzml) + + if args.check_raw: + + get_filelist(args.i, args.o, file_type='raw', create_filelist=args.create_filelist_raw) + + + +if __name__ == '__main__': + main() +