Mercurial > repos > greg > repmatch_gff3
changeset 8:d10ae3aeebc8 draft
Uploaded
author | greg |
---|---|
date | Wed, 02 Dec 2015 16:15:35 -0500 |
parents | 1807688a8a5f |
children | 39cb3eeacdbd |
files | repmatch_gff3_util.py |
diffstat | 1 files changed, 56 insertions(+), 56 deletions(-) [+] |
line wrap: on
line diff
--- a/repmatch_gff3_util.py Sat Nov 28 14:54:42 2015 -0500 +++ b/repmatch_gff3_util.py Wed Dec 02 16:15:35 2015 -0500 @@ -265,8 +265,8 @@ def process_files(dataset_paths, galaxy_hids, method, distance, step, replicates, up_limit, low_limit, output_files, - output_summary, output_orphan, output_detail, output_key, output_histogram): - output_histogram_file = output_files in ["all"] and method in ["all"] + output_matched_peaks, output_unmatched_peaks, output_detail, output_statistics_table, output_statistics_histogram): + output_statistics_histogram_file = output_files in ["all"] and method in ["all"] if len(dataset_paths) < 2: return if method == 'all': @@ -283,25 +283,25 @@ up_limit, low_limit, output_files, - output_summary, - output_orphan, + output_matched_peaks, + output_unmatched_peaks, output_detail, - output_key, - output_histogram) - if output_histogram_file: - tmp_histogram_path = get_temporary_plot_path() + output_statistics_table, + output_statistics_histogram) + if output_statistics_histogram_file: + tmp_statistics_histogram_path = get_temporary_plot_path() frequency_histogram([stat['distribution'] for stat in [statistics]], - tmp_histogram_path, + tmp_statistics_histogram_path, METHODS.keys()) - shutil.move(tmp_histogram_path, output_histogram) + shutil.move(tmp_statistics_histogram_path, output_statistics_histogram) def perform_process(dataset_paths, galaxy_hids, method, distance, step, num_required, up_limit, low_limit, output_files, - output_summary, output_orphan, output_detail, output_key, output_histogram): + output_matched_peaks, output_unmatched_peaks, output_detail, output_statistics_table, output_statistics_histogram): output_detail_file = output_files in ["all"] and output_detail is not None - output_key_file = output_files in ["all"] and output_key is not None - output_orphan_file = output_files in ["all", "simple_orphan"] and output_orphan is not None - output_histogram_file = output_files in ["all"] and output_histogram is not None + output_statistics_table_file = output_files in ["all"] and output_statistics_table is not None + output_unmatched_peaks_file = output_files in ["all", "matched_peaks_unmatched_peaks"] and output_unmatched_peaks is not None + output_statistics_histogram_file = output_files in ["all"] and output_statistics_histogram is not None replicates = [] for i, dataset_path in enumerate(dataset_paths): try: @@ -336,23 +336,23 @@ 'c-w sum', 'c-w distance', 'replicate id') - summary_output = td_writer(output_summary) - if output_key_file: - key_output = td_writer(output_key) - key_output.writerow(('data', 'median read count')) + matched_peaks_output = td_writer(output_matched_peaks) + if output_statistics_table_file: + statistics_table_output = td_writer(output_statistics_table) + statistics_table_output.writerow(('data', 'median read count')) if output_detail_file: detail_output = td_writer(output_detail) detail_output.writerow(labels) - if output_orphan_file: - orphan_output = td_writer(output_orphan) - orphan_output.writerow(('chrom', 'midpoint', 'midpoint+1', 'c-w sum', 'c-w distance', 'replicate id')) + if output_unmatched_peaks_file: + unmatched_peaks_output = td_writer(output_unmatched_peaks) + unmatched_peaks_output.writerow(('chrom', 'midpoint', 'midpoint+1', 'c-w sum', 'c-w distance', 'replicate id')) # Perform filtering if up_limit < 1000 or low_limit > -1000: for replicate in replicates: replicate.filter(up_limit, low_limit) # Actually merge the peaks peak_groups = [] - orphans = [] + unmatched_peaks = [] freq = FrequencyDistribution() def do_match(reps, distance): @@ -415,48 +415,48 @@ do_match(replicates, d) for group in peak_groups: freq.add(group.num_replicates) - # Collect together the remaining orphans + # Collect together the remaining unmatched_peaks for replicate in replicates: for chromosome in replicate.chromosomes.values(): for peak in chromosome.peaks: freq.add(1) - orphans.append(peak) - # Average the orphan count in the graph by # replicates + unmatched_peaks.append(peak) + # Average the unmatched_peaks count in the graph by # replicates med = median([peak.value for group in peak_groups for peak in group.peaks.values()]) for replicate in replicates: replicate.median = median([peak.value for group in peak_groups for peak in group.peaks.values() if peak.replicate == replicate]) - key_output.writerow((replicate.id, replicate.median)) + statistics_table_output.writerow((replicate.id, replicate.median)) for group in peak_groups: - # Output summary (matched pairs). - summary_output.writerow(gff_row(cname=group.chrom, - start=group.midpoint, - end=group.midpoint+1, - source='repmatch', - score=group.normalized_value(med), - attrs={'median_distance': group.median_distance, - 'replicates': group.num_replicates, - 'value_sum': group.value_sum})) + # Output matched_peaks (matched pairs). + matched_peaks_output.writerow(gff_row(cname=group.chrom, + start=group.midpoint, + end=group.midpoint+1, + source='repmatch', + score=group.normalized_value(med), + attrs={'median_distance': group.median_distance, + 'replicates': group.num_replicates, + 'value_sum': group.value_sum})) if output_detail_file: - summary = (group.chrom, - group.midpoint, - group.midpoint+1, - group.normalized_value(med), - group.num_replicates, - group.median_distance, - group.value_sum) + matched_peaks = (group.chrom, + group.midpoint, + group.midpoint+1, + group.normalized_value(med), + group.num_replicates, + group.median_distance, + group.value_sum) for peak in group.peaks.values(): - summary += (peak.chrom, peak.midpoint, peak.midpoint+1, peak.value, peak.distance, peak.replicate.id) - detail_output.writerow(summary) - if output_orphan_file: - for orphan in orphans: - orphan_output.writerow((orphan.chrom, - orphan.midpoint, - orphan.midpoint+1, - orphan.value, - orphan.distance, - orphan.replicate.id)) - if output_histogram_file: - tmp_histogram_path = get_temporary_plot_path() - frequency_histogram([freq], tmp_histogram_path) - shutil.move(tmp_histogram_path, output_histogram) + matched_peaks += (peak.chrom, peak.midpoint, peak.midpoint+1, peak.value, peak.distance, peak.replicate.id) + detail_output.writerow(matched_peaks) + if output_unmatched_peaks_file: + for unmatched_peak in unmatched_peaks: + unmatched_peaks_output.writerow((unmatched_peak.chrom, + unmatched_peak.midpoint, + unmatched_peak.midpoint+1, + unmatched_peak.value, + unmatched_peak.distance, + unmatched_peak.replicate.id)) + if output_statistics_histogram_file: + tmp_statistics_histogram_path = get_temporary_plot_path() + frequency_histogram([freq], tmp_statistics_histogram_path) + shutil.move(tmp_statistics_histogram_path, output_statistics_histogram) return {'distribution': freq}