cwpair2: cwpair2_util.py comparison

comparison cwpair2_util.py @ 8:1fc26b8e618d draft

Uploaded

author	greg
date	Wed, 02 Dec 2015 16:13:51 -0500
parents	2e0ddcc726f9
children

comparison

equal deleted inserted replaced

-:d455f14530dc
+:1fc26b8e618d
 import traceback
 import matplotlib
 matplotlib.use('Agg')
 from matplotlib import pyplot
+# Data outputs
 DETAILS = 'D'
-FINAL_PLOTS = 'F'
+MATCHED_PAIRS = 'MP'
 ORPHANS = 'O'
-PREVIEW_PLOTS = 'P'
+# Data output formats
-SIMPLES = 'S'
-STATS_GRAPH = 'C'
 GFF_EXT = 'gff'
 TABULAR_EXT = 'tabular'
+# Statistics historgrams output directory.
+HISTOGRAM = 'H'
+# Statistics outputs
+FINAL_PLOTS = 'F'
+PREVIEW_PLOTS = 'P'
+STATS_GRAPH = 'C'
 # Graph settings.
 COLORS = 'krg'
 Y_LABEL = 'Peak-pair counts'
 X_LABEL = 'Peak-pair distance (bp)'
 for l in ax.get_xticklines() + ax.get_yticklines():
 l.set_markeredgewidth(TICK_WIDTH)
 pyplot.savefig(fname)
-def create_directories(method):
+def create_directories():
-if method == 'all':
+# Output histograms in pdf.
-match_methods = METHODS.keys()
+os.mkdir(HISTOGRAM)
-else:
+os.mkdir('data_%s' % DETAILS)
-match_methods = [method]
+os.mkdir('data_%s' % ORPHANS)
-for match_method in match_methods:
+os.mkdir('data_%s' % MATCHED_PAIRS)
-os.mkdir('%s_%s' % (match_method, DETAILS))
-os.mkdir('%s_%s' % (match_method, FINAL_PLOTS))
-os.mkdir('%s_%s' % (match_method, ORPHANS))
-os.mkdir('%s_%s' % (match_method, PREVIEW_PLOTS))
-os.mkdir('%s_%s' % (match_method, SIMPLES))
-os.mkdir('%s_%s' % (match_method, STATS_GRAPH))
 def process_file(dataset_path, galaxy_hid, method, threshold, up_distance,
-down_distance, binsize, output_files, sort_score):
+down_distance, binsize, output_files):
 if method == 'all':
 match_methods = METHODS.keys()
 else:
 match_methods = [method]
 statistics = []
 match_method,
 threshold,
 up_distance,
 down_distance,
 binsize,
-output_files,
+output_files)
-sort_score)
 statistics.append(stats)
 if output_files == 'all' and method == 'all':
 frequency_plot([s['dist'] for s in statistics],
 statistics[0]['graph_path'],
 labels=METHODS.keys())
 return statistics
 def perform_process(dataset_path, galaxy_hid, method, threshold, up_distance,
-down_distance, binsize, output_files, sort_score):
+down_distance, binsize, output_files):
-output_details = output_files in ["all", "simple_orphan_detail"]
+output_details = output_files in ["all", "matched_pair_orphan_detail"]
 output_plots = output_files in ["all"]
-output_orphans = output_files in ["all", "simple_orphan", "simple_orphan_detail"]
+output_orphans = output_files in ["all", "matched_pair_orphan", "matched_pair_orphan_detail"]
 # Keep track of statistics for the output file
 statistics = {}
 input = csv.reader(open(dataset_path, 'rt'), delimiter='\t')
 fpath, fname = os.path.split(dataset_path)
 statistics['fname'] = '%s: data %s' % (method, str(galaxy_hid))
 statistics['dir'] = fpath
 if threshold >= 1:
 filter_string = 'fa%d' % threshold
 else:
 filter_string = 'f%d' % (threshold * 100)
-fname = 'data_%s_%su%dd%db%d' % (galaxy_hid, filter_string, up_distance, down_distance, binsize)
+fname = '%s_%su%dd%d_on_data_%s' % (method, filter_string, up_distance, down_distance, galaxy_hid)
-def make_path(output_type, extension=TABULAR_EXT):
+def make_histogram_path(output_type, fname):
-# Returns the full path for a certain output.
+return os.path.join(HISTOGRAM, 'histogram_%s_%s.%s' % (output_type, fname, PLOT_FORMAT))
+def make_path(output_type, extension, fname):
+# Returns the full path for an output.
 return os.path.join(output_type, '%s_%s.%s' % (output_type, fname, extension))
-def td_writer(output_type, extension=TABULAR_EXT):
+def td_writer(output_type, extension, fname):
 # Returns a tab-delimited writer for a specified output.
-output_file_path = make_path(output_type, extension)
+output_file_path = make_path(output_type, extension, fname)
 return csv.writer(open(output_file_path, 'wt'), delimiter='\t')
 try:
 chromosomes = parse_chromosomes(input)
 except Exception:
 stop_err('Unable to parse file "%s".\n%s' % (dataset_path, traceback.format_exc()))
 if output_details:
 # Details
-detailed_output = td_writer('%s_%s' % (method, DETAILS), extension=TABULAR_EXT)
+detailed_output = td_writer('data_%s' % DETAILS, TABULAR_EXT, fname)
 detailed_output.writerow(('chrom', 'start', 'end', 'value', 'strand') * 2 + ('midpoint', 'c-w reads sum', 'c-w distance (bp)'))
 if output_plots:
 # Final Plot
-final_plot_path = make_path('%s_%s' % (method, FINAL_PLOTS), PLOT_FORMAT)
+final_plot_path = make_histogram_path(FINAL_PLOTS, fname)
 if output_orphans:
 # Orphans
-orphan_output = td_writer('%s_%s' % (method, ORPHANS), extension=TABULAR_EXT)
+orphan_output = td_writer('data_%s' % ORPHANS, TABULAR_EXT, fname)
 orphan_output.writerow(('chrom', 'strand', 'start', 'end', 'value'))
 if output_plots:
 # Preview Plot
-preview_plot_path = make_path('%s_%s' % (method, PREVIEW_PLOTS), PLOT_FORMAT)
+preview_plot_path = make_histogram_path(PREVIEW_PLOTS, fname)
-# Simple
+# Matched Pairs.
-simple_output = td_writer('%s_%s' % (method, SIMPLES), extension=GFF_EXT)
+matched_pairs_output = td_writer('data_%s' % MATCHED_PAIRS, GFF_EXT, fname)
 statistics['stats_path'] = 'statistics.%s' % TABULAR_EXT
 if output_plots:
-statistics['graph_path'] = make_path('%s_%s' % (method, STATS_GRAPH), PLOT_FORMAT)
+statistics['graph_path'] = make_histogram_path(STATS_GRAPH, fname)
 statistics['perc95'] = perc95(chromosomes)
 if threshold > 0:
 # Apply filter
 filter(chromosomes, threshold)
 if method == 'mode':
 if output_orphans:
 for cpeak in crick:
 orphan_output.writerow((cname, cpeak[0], cpeak[1], cpeak[2], cpeak[3]))
 # Keep track of orphans for statistics.
 orphans += len(crick)
-# Sort output by score if specified.
+# Sort output descending by score.
-if sort_score == "desc":
+x.sort(key=lambda data: float(data[5]), reverse=True)
-x.sort(key=lambda data: float(data[5]), reverse=True)
-elif sort_score == "asc":
-x.sort(key=lambda data: float(data[5]))
 # Writing a summary to gff format file
 for row in x:
 row_tmp = list(row)
 # Dataset in tuple cannot be modified in Python, so row will
 # be converted to list format to add 'chr'.
 elif row_tmp[0] == "997":
 row_tmp[0] = 'chrX'
 else:
 row_tmp[0] = row_tmp[0]
 # Print row_tmp.
-simple_output.writerow(row_tmp)
+matched_pairs_output.writerow(row_tmp)
 statistics['paired'] = dist.size() * 2
 statistics['orphans'] = orphans
 statistics['final_mode'] = dist.mode()
 if output_plots:
 frequency_plot([dist], final_plot_path, title='Frequency distribution')

Mercurial > repos > greg > cwpair2

comparison cwpair2_util.py @ 8:1fc26b8e618d draft