comparison cwpair2_util.py @ 2:279cdc63bcff draft

Uploaded
author greg
date Sat, 21 Nov 2015 09:01:24 -0500
parents 4d86371aafa8
children 2e0ddcc726f9
comparison
equal deleted inserted replaced
1:4d86371aafa8 2:279cdc63bcff
20 COLORS = 'krg' 20 COLORS = 'krg'
21 Y_LABEL = 'Peak-pair counts' 21 Y_LABEL = 'Peak-pair counts'
22 X_LABEL = 'Peak-pair distance (bp)' 22 X_LABEL = 'Peak-pair distance (bp)'
23 TICK_WIDTH = 3 23 TICK_WIDTH = 3
24 ADJUST = [0.140, 0.9, 0.9, 0.1] 24 ADJUST = [0.140, 0.9, 0.9, 0.1]
25 PLOT_FORMAT = 'pdf'
25 pyplot.rc('xtick.major', size=10.00) 26 pyplot.rc('xtick.major', size=10.00)
26 pyplot.rc('ytick.major', size=10.00) 27 pyplot.rc('ytick.major', size=10.00)
27 pyplot.rc('lines', linewidth=4.00) 28 pyplot.rc('lines', linewidth=4.00)
28 pyplot.rc('axes', linewidth=3.00) 29 pyplot.rc('axes', linewidth=3.00)
29 pyplot.rc('font', family='Bitstream Vera Sans', size=32.0) 30 pyplot.rc('font', family='Bitstream Vera Sans', size=32.0)
221 os.mkdir('%s_%s' % (match_method, PREVIEW_PLOTS)) 222 os.mkdir('%s_%s' % (match_method, PREVIEW_PLOTS))
222 os.mkdir('%s_%s' % (match_method, SIMPLES)) 223 os.mkdir('%s_%s' % (match_method, SIMPLES))
223 os.mkdir('%s_%s' % (match_method, STATS_GRAPH)) 224 os.mkdir('%s_%s' % (match_method, STATS_GRAPH))
224 225
225 226
226 def process_file(dataset_path, galaxy_hid, method, threshold, up_distance, down_distance, 227 def process_file(dataset_path, galaxy_hid, method, threshold, up_distance,
227 binsize, output_files, plot_format, sort_chromosome, sort_score): 228 down_distance, binsize, output_files, sort_chromosome, sort_score):
228 if method == 'all': 229 if method == 'all':
229 match_methods = METHODS.keys() 230 match_methods = METHODS.keys()
230 else: 231 else:
231 match_methods = [method] 232 match_methods = [method]
232 statistics = [] 233 statistics = []
237 threshold, 238 threshold,
238 up_distance, 239 up_distance,
239 down_distance, 240 down_distance,
240 binsize, 241 binsize,
241 output_files, 242 output_files,
242 plot_format,
243 sort_chromosome, 243 sort_chromosome,
244 sort_score) 244 sort_score)
245 statistics.append(stats) 245 statistics.append(stats)
246 if output_files == 'all' and method == 'all': 246 if output_files == 'all' and method == 'all':
247 frequency_plot([s['dist'] for s in statistics], 247 frequency_plot([s['dist'] for s in statistics],
249 labels=METHODS.keys()) 249 labels=METHODS.keys())
250 return statistics 250 return statistics
251 251
252 252
253 def perform_process(dataset_path, galaxy_hid, method, threshold, up_distance, 253 def perform_process(dataset_path, galaxy_hid, method, threshold, up_distance,
254 down_distance, binsize, output_files, plot_format, sort_chromosome, sort_score): 254 down_distance, binsize, output_files, sort_chromosome, sort_score):
255 output_details = output_files in ["all", "simple_orphan_detail"] 255 output_details = output_files in ["all", "simple_orphan_detail"]
256 output_plots = output_files in ["all"] 256 output_plots = output_files in ["all"]
257 output_orphans = output_files in ["all", "simple_orphan", "simple_orphan_detail"] 257 output_orphans = output_files in ["all", "simple_orphan", "simple_orphan_detail"]
258 # Keep track of statistics for the output file 258 # Keep track of statistics for the output file
259 statistics = {} 259 statistics = {}
284 # Details 284 # Details
285 detailed_output = td_writer('%s_%s' % (method, DETAILS), extension=TABULAR_EXT) 285 detailed_output = td_writer('%s_%s' % (method, DETAILS), extension=TABULAR_EXT)
286 detailed_output.writerow(('chrom', 'start', 'end', 'value', 'strand') * 2 + ('midpoint', 'c-w reads sum', 'c-w distance (bp)')) 286 detailed_output.writerow(('chrom', 'start', 'end', 'value', 'strand') * 2 + ('midpoint', 'c-w reads sum', 'c-w distance (bp)'))
287 if output_plots: 287 if output_plots:
288 # Final Plot 288 # Final Plot
289 final_plot_path = make_path('%s_%s' % (method, FINAL_PLOTS), plot_format) 289 final_plot_path = make_path('%s_%s' % (method, FINAL_PLOTS), PLOT_FORMAT)
290 if output_orphans: 290 if output_orphans:
291 # Orphans 291 # Orphans
292 orphan_output = td_writer('%s_%s' % (method, ORPHANS), extension=TABULAR_EXT) 292 orphan_output = td_writer('%s_%s' % (method, ORPHANS), extension=TABULAR_EXT)
293 orphan_output.writerow(('chrom', 'strand', 'start', 'end', 'value')) 293 orphan_output.writerow(('chrom', 'strand', 'start', 'end', 'value'))
294 if output_plots: 294 if output_plots:
295 # Preview Plot 295 # Preview Plot
296 preview_plot_path = make_path('%s_%s' % (method, PREVIEW_PLOTS), plot_format) 296 preview_plot_path = make_path('%s_%s' % (method, PREVIEW_PLOTS), PLOT_FORMAT)
297 # Simple 297 # Simple
298 simple_output = td_writer('%s_%s' % (method, SIMPLES), extension=GFF_EXT) 298 simple_output = td_writer('%s_%s' % (method, SIMPLES), extension=GFF_EXT)
299 statistics['stats_path'] = 'statistics.%s' % TABULAR_EXT 299 statistics['stats_path'] = 'statistics.%s' % TABULAR_EXT
300 if output_plots: 300 if output_plots:
301 statistics['graph_path'] = make_path('%s_%s' % (method, STATS_GRAPH), plot_format) 301 statistics['graph_path'] = make_path('%s_%s' % (method, STATS_GRAPH), PLOT_FORMAT)
302 statistics['perc95'] = perc95(chromosomes) 302 statistics['perc95'] = perc95(chromosomes)
303 if threshold > 0: 303 if threshold > 0:
304 # Apply filter 304 # Apply filter
305 filter(chromosomes, threshold) 305 filter(chromosomes, threshold)
306 if method == 'mode': 306 if method == 'mode':