Mercurial > repos > pieterlukasse > prims_metabolomics
comparison combine_output.py @ 1:071a185c2ced
new tools
| author | pieter.lukasse@wur.nl |
|---|---|
| date | Fri, 24 Oct 2014 12:52:56 +0200 |
| parents | 4b94bb2d381c |
| children |
comparison
equal
deleted
inserted
replaced
| 0:4b94bb2d381c | 1:071a185c2ced |
|---|---|
| 153 ''' | 153 ''' |
| 154 Writes tab-separated data to file | 154 Writes tab-separated data to file |
| 155 @param data: dictionary containing merged dataset | 155 @param data: dictionary containing merged dataset |
| 156 @param out_csv: output csv file | 156 @param out_csv: output csv file |
| 157 ''' | 157 ''' |
| 158 header = ['Centrotype', | 158 # Columns we don't repeat: |
| 159 header_part1 = ['Centrotype', | |
| 159 'cent.Factor', | 160 'cent.Factor', |
| 160 'scan nr.', | 161 'scan nr.', |
| 161 'R.T. (umin)', | 162 'R.T. (umin)', |
| 162 'nr. Peaks', | 163 'nr. Peaks', |
| 163 'R.T.', | 164 'R.T.'] |
| 165 # These are the headers/columns we repeat in case of | |
| 166 # combining hits in one line (see alternative_headers method below): | |
| 167 header_part2 = [ | |
| 164 'Name', | 168 'Name', |
| 165 'FORMULA', | 169 'FORMULA', |
| 166 'Library', | 170 'Library', |
| 167 'CAS', | 171 'CAS', |
| 168 'Forward', | 172 'Forward', |
| 188 outfile_multi_handle = open(out_csv_multi, 'wb') | 192 outfile_multi_handle = open(out_csv_multi, 'wb') |
| 189 output_single_handle = csv.writer(outfile_single_handle, delimiter="\t") | 193 output_single_handle = csv.writer(outfile_single_handle, delimiter="\t") |
| 190 output_multi_handle = csv.writer(outfile_multi_handle, delimiter="\t") | 194 output_multi_handle = csv.writer(outfile_multi_handle, delimiter="\t") |
| 191 | 195 |
| 192 # Write headers | 196 # Write headers |
| 193 output_single_handle.writerow(header) | 197 output_single_handle.writerow(header_part1 + header_part2) |
| 194 output_multi_handle.writerow(header * nhits) | 198 output_multi_handle.writerow(header_part1 + header_part2 + alternative_headers(header_part2, nhits-1)) |
| 195 # Combine all hits for each centrotype into one line | 199 # Combine all hits for each centrotype into one line |
| 196 line = [] | 200 line = [] |
| 197 for centrotype_idx in xrange(len(data)): | 201 for centrotype_idx in xrange(len(data)): |
| 202 i = 0 | |
| 198 for hit in data[centrotype_idx]: | 203 for hit in data[centrotype_idx]: |
| 199 line.extend(hit) | 204 if i==0: |
| 205 line.extend(hit) | |
| 206 else: | |
| 207 line.extend(hit[6:]) | |
| 208 i = i+1 | |
| 209 # small validation (if error, it is a programming error): | |
| 210 if i > nhits: | |
| 211 raise Exception('Error: more hits that expected for centrotype_idx ' + centrotype_idx) | |
| 200 output_multi_handle.writerow(line) | 212 output_multi_handle.writerow(line) |
| 201 line = [] | 213 line = [] |
| 202 | 214 |
| 203 # Write one line for each centrotype | 215 # Write one line for each centrotype |
| 204 for centrotype_idx in xrange(len(data)): | 216 for centrotype_idx in xrange(len(data)): |
| 205 for hit in data[centrotype_idx]: | 217 for hit in data[centrotype_idx]: |
| 206 output_single_handle.writerow(hit) | 218 output_single_handle.writerow(hit) |
| 207 | 219 |
| 220 def alternative_headers(header_part2, nr_alternative_hits): | |
| 221 ''' | |
| 222 This method will iterate over the header names and add the string 'ALT#_' before each, | |
| 223 where # is the number of the alternative, according to number of alternative hits we want to add | |
| 224 to final csv/tsv | |
| 225 ''' | |
| 226 result = [] | |
| 227 for i in xrange(nr_alternative_hits): | |
| 228 for header_name in header_part2: | |
| 229 result.append("ALT" + str(i+1) + "_" + header_name) | |
| 230 return result | |
| 208 | 231 |
| 209 def main(): | 232 def main(): |
| 210 ''' | 233 ''' |
| 211 Combine Output main function | 234 Combine Output main function |
| 212 It will merge the result files from "RankFilter" and "Lookup RI for CAS numbers" | 235 It will merge the result files from "RankFilter" and "Lookup RI for CAS numbers" |
