annotate combine_output.py @ 0:4b94bb2d381c

Initial commit to toolshed
author pieter.lukasse@wur.nl
date Thu, 16 Jan 2014 13:22:38 +0100
parents
children 071a185c2ced
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
1 #!/usr/bin/env python
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
2 # encoding: utf-8
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
3 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
4 Module to combine output from two GCMS Galaxy tools (RankFilter and CasLookup)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
5 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
6
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
7 import csv
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
8 import re
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
9 import sys
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
10 import math
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
11 import pprint
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
12
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
13 __author__ = "Marcel Kempenaar"
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
14 __contact__ = "brs@nbic.nl"
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
15 __copyright__ = "Copyright, 2012, Netherlands Bioinformatics Centre"
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
16 __license__ = "MIT"
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
17
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
18 def _process_data(in_csv):
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
19 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
20 Generic method to parse a tab-separated file returning a dictionary with named columns
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
21 @param in_csv: input filename to be parsed
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
22 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
23 data = list(csv.reader(open(in_csv, 'rU'), delimiter='\t'))
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
24 header = data.pop(0)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
25 # Create dictionary with column name as key
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
26 output = {}
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
27 for index in xrange(len(header)):
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
28 output[header[index]] = [row[index] for row in data]
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
29 return output
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
30
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
31
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
32 def _merge_data(rankfilter, caslookup):
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
33 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
34 Merges data from both input dictionaries based on the Centrotype field. This method will
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
35 build up a new list containing the merged hits as the items.
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
36 @param rankfilter: dictionary holding RankFilter output in the form of N lists (one list per attribute name)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
37 @param caslookup: dictionary holding CasLookup output in the form of N lists (one list per attribute name)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
38 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
39 # TODO: test for correct input files -> rankfilter and caslookup internal lists should have the same lenghts:
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
40 if (len(rankfilter['ID']) != len(caslookup['Centrotype'])):
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
41 raise Exception('rankfilter and caslookup files should have the same nr of rows/records ')
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
42
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
43 merged = []
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
44 processed = {}
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
45 for compound_id_idx in xrange(len(rankfilter['ID'])):
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
46 compound_id = rankfilter['ID'][compound_id_idx]
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
47 if not compound_id in processed :
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
48 # keep track of processed items to not repeat them
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
49 processed[compound_id] = compound_id
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
50 # get centrotype nr
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
51 centrotype = compound_id.split('-')[0]
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
52 # Get the indices for current compound ID in both data-structures for proper matching
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
53 rindex = [index for index, value in enumerate(rankfilter['ID']) if value == compound_id]
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
54 cindex = [index for index, value in enumerate(caslookup['Centrotype']) if value == centrotype]
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
55
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
56 merged_hits = []
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
57 # Combine hits
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
58 for hit in xrange(len(rindex)):
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
59 # Create records of hits to be merged ("keys" are the attribute names, so what the lines below do
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
60 # is create a new "dict" item with same "keys"/attributes, with each attribute filled with its
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
61 # corresponding value in the rankfilter or caslookup tables; i.e.
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
62 # rankfilter[key] => returns the list/array with size = nrrows, with the values for the attribute
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
63 # represented by "key". rindex[hit] => points to the row nr=hit (hit is a rownr/index)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
64 rf_record = dict(zip(rankfilter.keys(), [rankfilter[key][rindex[hit]] for key in rankfilter.keys()]))
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
65 cl_record = dict(zip(caslookup.keys(), [caslookup[key][cindex[hit]] for key in caslookup.keys()]))
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
66
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
67 merged_hit = _add_hit(rf_record, cl_record)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
68 merged_hits.append(merged_hit)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
69
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
70 merged.append(merged_hits)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
71
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
72 return merged, len(rindex)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
73
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
74
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
75 def _add_hit(rankfilter, caslookup):
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
76 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
77 Combines single records from both the RankFilter- and CasLookup-tools
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
78 @param rankfilter: record (dictionary) of one compound in the RankFilter output
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
79 @param caslookup: matching record (dictionary) of one compound in the CasLookup output
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
80 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
81 # The ID in the RankFilter output contains the following 5 fields:
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
82 rf_id = rankfilter['ID'].split('-')
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
83 try:
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
84 name, formula = _remove_formula(rankfilter['Name'])
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
85 hit = [rf_id[0], # Centrotype
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
86 rf_id[1], # cent.Factor
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
87 rf_id[2], # scan nr
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
88 rf_id[3], # R.T. (umin)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
89 rf_id[4], # nr. Peaks
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
90 # Appending other fields
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
91 rankfilter['R.T.'],
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
92 name,
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
93 caslookup['FORMULA'] if not formula else formula,
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
94 rankfilter['Library'].strip(),
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
95 rankfilter['CAS'].strip(),
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
96 rankfilter['Forward'],
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
97 rankfilter['Reverse'],
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
98 ((float(rankfilter['Forward']) + float(rankfilter['Reverse'])) / 2),
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
99 rankfilter['RIexp'],
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
100 caslookup['RI'],
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
101 rankfilter['RIsvr'],
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
102 # Calculate absolute differences
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
103 math.fabs(float(rankfilter['RIexp']) - float(rankfilter['RIsvr'])),
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
104 math.fabs(float(caslookup['RI']) - float(rankfilter['RIexp'])),
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
105 caslookup['Regression.Column.Name'],
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
106 caslookup['min'],
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
107 caslookup['max'],
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
108 caslookup['nr.duplicates'],
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
109 caslookup['Column.phase.type'],
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
110 caslookup['Column.name'],
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
111 rankfilter['Rank'],
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
112 rankfilter['%rel.err'],
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
113 rankfilter['Synonyms']]
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
114 except KeyError as error:
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
115 print "Problem reading in data from input file(s):\n",
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
116 print "Respective CasLookup entry: \n", pprint.pprint(caslookup), "\n"
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
117 print "Respective RankFilter entry: \n", pprint.pprint(rankfilter), "\n"
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
118 raise error
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
119
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
120 return hit
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
121
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
122
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
123 def _remove_formula(name):
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
124 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
125 The RankFilter Name field often contains the Formula as well, this function removes it from the Name
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
126 @param name: complete name of the compound from the RankFilter output
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
127 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
128 name = name.split()
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
129 poss_formula = name[-1]
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
130 match = re.match("^(([A-Z][a-z]{0,2})(\d*))+$", poss_formula)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
131 if match:
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
132 return ' '.join(name[:-1]), poss_formula
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
133 else:
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
134 return ' '.join(name), False
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
135
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
136
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
137 def _get_default_caslookup():
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
138 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
139 The Cas Lookup tool might not have found all compounds in the library searched,
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
140 this default dict will be used to combine with the Rank Filter output
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
141 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
142 return {'FORMULA': 'N/A',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
143 'RI': '0.0',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
144 'Regression.Column.Name': 'None',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
145 'min': '0.0',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
146 'max': '0.0',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
147 'nr.duplicates': '0',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
148 'Column.phase.type': 'N/A',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
149 'Column.name': 'N/A'}
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
150
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
151
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
152 def _save_data(data, nhits, out_csv_single, out_csv_multi):
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
153 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
154 Writes tab-separated data to file
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
155 @param data: dictionary containing merged dataset
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
156 @param out_csv: output csv file
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
157 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
158 header = ['Centrotype',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
159 'cent.Factor',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
160 'scan nr.',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
161 'R.T. (umin)',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
162 'nr. Peaks',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
163 'R.T.',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
164 'Name',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
165 'FORMULA',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
166 'Library',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
167 'CAS',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
168 'Forward',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
169 'Reverse',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
170 'Avg. (Forward, Reverse)',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
171 'RIexp',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
172 'RI',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
173 'RIsvr',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
174 'RIexp - RIsvr',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
175 'RI - RIexp',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
176 'Regression.Column.Name',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
177 'min',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
178 'max',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
179 'nr.duplicates',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
180 'Column.phase.type',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
181 'Column.name',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
182 'Rank',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
183 '%rel.err',
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
184 'Synonyms']
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
185
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
186 # Open output file for writing
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
187 outfile_single_handle = open(out_csv_single, 'wb')
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
188 outfile_multi_handle = open(out_csv_multi, 'wb')
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
189 output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
190 output_multi_handle = csv.writer(outfile_multi_handle, delimiter="\t")
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
191
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
192 # Write headers
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
193 output_single_handle.writerow(header)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
194 output_multi_handle.writerow(header * nhits)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
195 # Combine all hits for each centrotype into one line
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
196 line = []
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
197 for centrotype_idx in xrange(len(data)):
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
198 for hit in data[centrotype_idx]:
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
199 line.extend(hit)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
200 output_multi_handle.writerow(line)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
201 line = []
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
202
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
203 # Write one line for each centrotype
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
204 for centrotype_idx in xrange(len(data)):
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
205 for hit in data[centrotype_idx]:
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
206 output_single_handle.writerow(hit)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
207
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
208
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
209 def main():
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
210 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
211 Combine Output main function
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
212 It will merge the result files from "RankFilter" and "Lookup RI for CAS numbers"
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
213 NB: the caslookup_result_file will typically have fewer lines than
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
214 rankfilter_result_file, so the merge has to consider this as well. The final file
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
215 should have the same nr of lines as rankfilter_result_file.
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
216 '''
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
217 rankfilter_result_file = sys.argv[1]
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
218 caslookup_result_file = sys.argv[2]
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
219 output_single_csv = sys.argv[3]
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
220 output_multi_csv = sys.argv[4]
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
221
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
222 # Read RankFilter and CasLookup output files
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
223 rankfilter = _process_data(rankfilter_result_file)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
224 caslookup = _process_data(caslookup_result_file)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
225 merged, nhits = _merge_data(rankfilter, caslookup)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
226 _save_data(merged, nhits, output_single_csv, output_multi_csv)
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
227
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
228
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
229 if __name__ == '__main__':
4b94bb2d381c Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
230 main()