Mercurial > repos > greg > cwpair2
changeset 8:1fc26b8e618d draft
Uploaded
author | greg |
---|---|
date | Wed, 02 Dec 2015 16:13:51 -0500 |
parents | d455f14530dc |
children | 6469dda597d6 |
files | cwpair2_util.py |
diffstat | 1 files changed, 38 insertions(+), 40 deletions(-) [+] |
line wrap: on
line diff
--- a/cwpair2_util.py Tue Nov 24 08:15:57 2015 -0500 +++ b/cwpair2_util.py Wed Dec 02 16:13:51 2015 -0500 @@ -7,14 +7,19 @@ matplotlib.use('Agg') from matplotlib import pyplot +# Data outputs DETAILS = 'D' -FINAL_PLOTS = 'F' +MATCHED_PAIRS = 'MP' ORPHANS = 'O' -PREVIEW_PLOTS = 'P' -SIMPLES = 'S' -STATS_GRAPH = 'C' +# Data output formats GFF_EXT = 'gff' TABULAR_EXT = 'tabular' +# Statistics historgrams output directory. +HISTOGRAM = 'H' +# Statistics outputs +FINAL_PLOTS = 'F' +PREVIEW_PLOTS = 'P' +STATS_GRAPH = 'C' # Graph settings. COLORS = 'krg' @@ -210,22 +215,16 @@ pyplot.savefig(fname) -def create_directories(method): - if method == 'all': - match_methods = METHODS.keys() - else: - match_methods = [method] - for match_method in match_methods: - os.mkdir('%s_%s' % (match_method, DETAILS)) - os.mkdir('%s_%s' % (match_method, FINAL_PLOTS)) - os.mkdir('%s_%s' % (match_method, ORPHANS)) - os.mkdir('%s_%s' % (match_method, PREVIEW_PLOTS)) - os.mkdir('%s_%s' % (match_method, SIMPLES)) - os.mkdir('%s_%s' % (match_method, STATS_GRAPH)) +def create_directories(): + # Output histograms in pdf. + os.mkdir(HISTOGRAM) + os.mkdir('data_%s' % DETAILS) + os.mkdir('data_%s' % ORPHANS) + os.mkdir('data_%s' % MATCHED_PAIRS) def process_file(dataset_path, galaxy_hid, method, threshold, up_distance, - down_distance, binsize, output_files, sort_score): + down_distance, binsize, output_files): if method == 'all': match_methods = METHODS.keys() else: @@ -239,8 +238,7 @@ up_distance, down_distance, binsize, - output_files, - sort_score) + output_files) statistics.append(stats) if output_files == 'all' and method == 'all': frequency_plot([s['dist'] for s in statistics], @@ -250,10 +248,10 @@ def perform_process(dataset_path, galaxy_hid, method, threshold, up_distance, - down_distance, binsize, output_files, sort_score): - output_details = output_files in ["all", "simple_orphan_detail"] + down_distance, binsize, output_files): + output_details = output_files in ["all", "matched_pair_orphan_detail"] output_plots = output_files in ["all"] - output_orphans = output_files in ["all", "simple_orphan", "simple_orphan_detail"] + output_orphans = output_files in ["all", "matched_pair_orphan", "matched_pair_orphan_detail"] # Keep track of statistics for the output file statistics = {} input = csv.reader(open(dataset_path, 'rt'), delimiter='\t') @@ -264,15 +262,18 @@ filter_string = 'fa%d' % threshold else: filter_string = 'f%d' % (threshold * 100) - fname = 'data_%s_%su%dd%db%d' % (galaxy_hid, filter_string, up_distance, down_distance, binsize) + fname = '%s_%su%dd%d_on_data_%s' % (method, filter_string, up_distance, down_distance, galaxy_hid) - def make_path(output_type, extension=TABULAR_EXT): - # Returns the full path for a certain output. + def make_histogram_path(output_type, fname): + return os.path.join(HISTOGRAM, 'histogram_%s_%s.%s' % (output_type, fname, PLOT_FORMAT)) + + def make_path(output_type, extension, fname): + # Returns the full path for an output. return os.path.join(output_type, '%s_%s.%s' % (output_type, fname, extension)) - def td_writer(output_type, extension=TABULAR_EXT): + def td_writer(output_type, extension, fname): # Returns a tab-delimited writer for a specified output. - output_file_path = make_path(output_type, extension) + output_file_path = make_path(output_type, extension, fname) return csv.writer(open(output_file_path, 'wt'), delimiter='\t') try: @@ -281,23 +282,23 @@ stop_err('Unable to parse file "%s".\n%s' % (dataset_path, traceback.format_exc())) if output_details: # Details - detailed_output = td_writer('%s_%s' % (method, DETAILS), extension=TABULAR_EXT) + detailed_output = td_writer('data_%s' % DETAILS, TABULAR_EXT, fname) detailed_output.writerow(('chrom', 'start', 'end', 'value', 'strand') * 2 + ('midpoint', 'c-w reads sum', 'c-w distance (bp)')) if output_plots: # Final Plot - final_plot_path = make_path('%s_%s' % (method, FINAL_PLOTS), PLOT_FORMAT) + final_plot_path = make_histogram_path(FINAL_PLOTS, fname) if output_orphans: # Orphans - orphan_output = td_writer('%s_%s' % (method, ORPHANS), extension=TABULAR_EXT) + orphan_output = td_writer('data_%s' % ORPHANS, TABULAR_EXT, fname) orphan_output.writerow(('chrom', 'strand', 'start', 'end', 'value')) if output_plots: # Preview Plot - preview_plot_path = make_path('%s_%s' % (method, PREVIEW_PLOTS), PLOT_FORMAT) - # Simple - simple_output = td_writer('%s_%s' % (method, SIMPLES), extension=GFF_EXT) + preview_plot_path = make_histogram_path(PREVIEW_PLOTS, fname) + # Matched Pairs. + matched_pairs_output = td_writer('data_%s' % MATCHED_PAIRS, GFF_EXT, fname) statistics['stats_path'] = 'statistics.%s' % TABULAR_EXT if output_plots: - statistics['graph_path'] = make_path('%s_%s' % (method, STATS_GRAPH), PLOT_FORMAT) + statistics['graph_path'] = make_histogram_path(STATS_GRAPH, fname) statistics['perc95'] = perc95(chromosomes) if threshold > 0: # Apply filter @@ -366,11 +367,8 @@ orphan_output.writerow((cname, cpeak[0], cpeak[1], cpeak[2], cpeak[3])) # Keep track of orphans for statistics. orphans += len(crick) - # Sort output by score if specified. - if sort_score == "desc": - x.sort(key=lambda data: float(data[5]), reverse=True) - elif sort_score == "asc": - x.sort(key=lambda data: float(data[5])) + # Sort output descending by score. + x.sort(key=lambda data: float(data[5]), reverse=True) # Writing a summary to gff format file for row in x: row_tmp = list(row) @@ -385,7 +383,7 @@ else: row_tmp[0] = row_tmp[0] # Print row_tmp. - simple_output.writerow(row_tmp) + matched_pairs_output.writerow(row_tmp) statistics['paired'] = dist.size() * 2 statistics['orphans'] = orphans statistics['final_mode'] = dist.mode()