comparison dma-filelist-generation.py @ 1:defa57c7775e draft

planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit 6c48bd51987a28401de6cf5e49b1b30e5e73fe16-dirty
author tomnl
date Tue, 27 Mar 2018 07:18:42 -0400
parents
children
comparison
equal deleted inserted replaced
0:d2940fcb7104 1:defa57c7775e
1 import argparse
2 import textwrap
3 import os
4 import re
5 import collections
6 import csv
7 from operator import itemgetter
8
9 def check_folder(in_dir, reg_string):
10 filelist = []
11 for f in os.listdir(in_dir):
12
13 if re.match(reg_string, f, re.IGNORECASE):
14
15 fn, fe = os.path.splitext(f)
16 fnl = fn.split("_")
17 filename = fn
18 fullpth = os.path.join(in_dir, f)
19 wells = fnl[0]
20
21
22
23 if re.match("^.*pos.*", f, re.IGNORECASE):
24 polarity = "pos"
25 elif re.match("^.*neg.*", f, re.IGNORECASE):
26 polarity = "neg"
27 else:
28 error_message = "Files need to have either 'pos' or 'neg' in the file name"
29 print error_message
30 return 1, error_message, "", ""
31 if re.match("^.*blank.*", f, re.IGNORECASE):
32 sample_type = "blank"
33 else:
34 sample_type = "sample"
35
36
37 filelist.append([wells, sample_type, polarity, filename, fullpth])
38
39 return filelist
40
41 def get_filelist(filesin, o, file_type='mzML', create_filelist=True):
42
43 filedict = collections.OrderedDict()
44
45
46 if file_type=="mzML":
47 reg_string = "^.*mzML$"
48 else:
49 reg_string = "^.*raw$"
50
51 if isinstance(filesin, list):
52 filelist = []
53 for f in filesin:
54 filelist.extend(check_folder(f, reg_string))
55 else:
56 filelist = check_folder(filesin, reg_string)
57
58 # Turn filelist into a dictionary
59 for f1 in filelist:
60 well = f1[0]
61 filedict[well] = []
62 for f2 in filelist:
63 if well == f2[0]:
64 filedict[well].append(f2[1:len(f2)+1])
65
66
67
68 filelist = sorted(filelist, key=itemgetter(0, 1))
69
70 for k, v in filedict.iteritems():
71 classes = [i[0] for i in v]
72 classes.sort()
73 if not classes == ['blank', 'sample']:
74 error_message = "!!!!ERROR!!!! Blank and sample required for each well, file type {}".format(file_type)
75 print error_message
76 return 1, error_message, "", ""
77
78 print 'files of type {} checked, files OK'.format(file_type)
79
80 if create_filelist:
81
82 outname = write_filedict(filedict, o, file_type)
83 print 'filelist created in folder {}, using file type {}, full path {}'.format(o, file_type, outname)
84
85 return 0, "files OK", filedict, filelist
86
87 def write_filedict(filedict, out_dir, file_type, file_spacing='tsv'):
88
89 outname = os.path.join(out_dir,'filelist_{}.{}'.format(file_type, file_spacing))
90
91 if file_spacing=='tsv':
92 delim = '\t'
93 elif file_spacing=='csv':
94 delim = ','
95 else:
96 delim = ','
97
98
99 with open(outname, 'wb') as csvfile:
100 w = csv.writer(csvfile, delimiter=delim)
101
102 w.writerow(['filename','classLabel', 'multilist', 'multilistLabel'])
103 c = 1
104 for k, v in filedict.iteritems():
105 for i in v:
106 w.writerow([os.path.basename(i[3]), i[0], c, k])
107 c +=1
108
109 return outname
110
111
112 def main():
113
114 p = argparse.ArgumentParser(prog='PROG',
115 formatter_class=argparse.RawDescriptionHelpFormatter,
116 description='''Create filelist for DMA DIMS nearline workflow''',
117 epilog=textwrap.dedent('''
118 -------------------------------------------------------------------------
119
120 Example Usage
121
122 python dma-filelist-generation.py -i [dir with sample files], [dir with blank files] -o .
123
124 '''))
125
126 p.add_argument('-i', dest='i', help='dir with sample files', nargs = '*', required=True)
127 p.add_argument('-o', dest='o', help='out dir', required=True)
128 p.add_argument('--check_mzml', dest='check_mzml', action='store_true')
129 p.add_argument('--check_raw', dest='check_raw', action='store_true')
130 p.add_argument('--create_filelist_mzml', dest='create_filelist_mzml', action='store_true')
131 p.add_argument('--create_filelist_raw', dest='create_filelist_raw', action='store_true')
132
133 args = p.parse_args()
134
135 if not os.path.exists(args.o):
136 os.makedirs(args.o)
137 print args.o
138
139 if not args.check_mzml and not args.check_raw:
140 print '--check_mzml or --check_raw (or both) are required as inputs'
141 exit()
142
143 if args.check_mzml:
144 get_filelist(args.i, args.o, file_type='mzML', create_filelist=args.create_filelist_mzml)
145
146 if args.check_raw:
147
148 get_filelist(args.i, args.o, file_type='raw', create_filelist=args.create_filelist_raw)
149
150
151
152 if __name__ == '__main__':
153 main()
154